use of org.apache.lucene.search.ScoreDoc in project searchcode-server by boyter.
the class CodeSearcher method getByCodeId.
/**
* Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
* this should always work as the path used is sha1 and should be unique for anything the current codebase can
* deal with
*/
public CodeResult getByCodeId(String codeId) {
CodeResult codeResult = null;
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));
TopDocs results = searcher.search(query, 1);
ScoreDoc[] hits = results.scoreDocs;
if (hits.length != 0) {
Document doc = searcher.doc(hits[0].doc);
String filepath = doc.get(Values.PATH);
List<String> code = new ArrayList<>();
try {
code = Singleton.getHelpers().readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt(Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH));
} catch (Exception ex) {
Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
}
codeResult = new CodeResult(code, null);
codeResult.setFilePath(filepath);
codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
codeResult.setFileName(doc.get(Values.FILENAME));
codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
codeResult.setMd5hash(doc.get(Values.MD5HASH));
codeResult.setCodeLines(doc.get(Values.CODELINES));
codeResult.setDocumentId(hits[0].doc);
codeResult.setRepoName(doc.get(Values.REPONAME));
codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
codeResult.setCodeId(doc.get(Values.CODEID));
}
reader.close();
} catch (Exception ex) {
LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return codeResult;
}
use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.
the class TweetSearcherAPI method search.
@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
try {
Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN = query.getCount();
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<SearchResult> resultHits = new ArrayList<>();
for (int i = 0; i < hits.length && i < topN; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
}
return resultHits;
} catch (Exception e) {
e.printStackTrace();
return new ArrayList<>();
}
}
use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.
the class TweetServlet method doGet.
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (request.getRequestURI().equals("/search")) {
response.setStatus(HttpServletResponse.SC_OK);
response.setContentType("text/html");
request.setCharacterEncoding("UTF-8");
Query q;
try {
q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN;
if (request.getParameter("top") != null) {
topN = Integer.parseInt(request.getParameter("top"));
} else {
// TODO configurable, default(parameter unspecified in url) topN = 20
topN = 20;
}
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
}
MustacheFactory mf = new DefaultMustacheFactory();
Mustache mustache = mf.compile(MustacheTemplatePath);
mustache.execute(response.getWriter(), tweetHits).flush();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
}
}
use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.
the class BaseFeatureExtractor method printFeatures.
/**
* Prints feature vectors wrt to the qrels, one vector per qrel
* @param out
* @throws IOException
*/
public void printFeatures(PrintStream out) throws IOException {
Map<String, RerankerContext> queryContextMap = buildRerankerContextMap();
FeatureExtractors extractors = getExtractors();
Bits liveDocs = MultiFields.getLiveDocs(reader);
Set<String> fieldsToLoad = getFieldsToLoad();
// We need to open a searcher
IndexSearcher searcher = new IndexSearcher(reader);
this.printHeader(out, extractors);
// Iterate through all the qrels and for each document id we have for them
LOG.debug("Processing queries");
for (String qid : this.qrels.getQids()) {
LOG.debug(String.format("Processing qid: %s", qid));
// Get the map of documents
RerankerContext context = queryContextMap.get(qid);
for (Map.Entry<String, Integer> entry : this.qrels.getDocMap(qid).entrySet()) {
String docId = entry.getKey();
int qrelScore = entry.getValue();
// We issue a specific query
TopDocs topDocs = searcher.search(docIdQuery(docId), 1);
if (topDocs.totalHits == 0) {
LOG.warn(String.format("Document Id %s expected but not found in index, skipping...", docId));
continue;
}
ScoreDoc hit = topDocs.scoreDocs[0];
Document doc = reader.document(hit.doc, fieldsToLoad);
//TODO factor for test
Terms terms = reader.getTermVector(hit.doc, getTermVectorField());
if (terms == null) {
LOG.debug(String.format("No term vectors found for doc %s, qid %s", docId, qid));
continue;
}
float[] featureValues = extractors.extractAll(doc, terms, context);
writeFeatureVector(out, qid, qrelScore, docId, featureValues);
}
LOG.debug(String.format("Finished processing for qid: %s", qid));
out.flush();
}
}
use of org.apache.lucene.search.ScoreDoc in project ddf by codice.
the class GeoNamesQueryLuceneIndex method doGetNearestCities.
protected List<NearbyLocation> doGetNearestCities(final Shape shape, final int radiusInKm, final int maxResults, final Directory directory) throws GeoEntryQueryException {
notNull(shape, "GeoNamesQueryLuceneIndex.doGetNearestCities(): argument 'shape' may not be null.");
if (radiusInKm <= 0) {
throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): radiusInKm must be positive.");
}
if (maxResults <= 0) {
throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): maxResults must be positive.");
}
if (directory == null) {
return Collections.emptyList();
}
try (final IndexReader indexReader = createIndexReader(directory)) {
final IndexSearcher indexSearcher = createIndexSearcher(indexReader);
final List<NearbyLocation> closestCities = new ArrayList<>();
final Point center = shape.getCenter();
final Query filter = createSpatialQuery(center, radiusInKm);
// Query for all the documents in the index that are cities, then filter those
// results for the ones that are in the search area.
final BooleanQuery booleanQuery = new BooleanQuery.Builder().add(PPL_QUERY, BooleanClause.Occur.MUST).add(filter, BooleanClause.Occur.FILTER).build();
final TopDocs topDocs = indexSearcher.search(booleanQuery, maxResults, SORT);
if (topDocs.totalHits > 0) {
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
final double lat = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LATITUDE_FIELD));
final double lon = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LONGITUDE_FIELD));
final String name = indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.NAME_FIELD);
final NearbyLocation city = new NearbyLocationImpl(center, new PointImpl(lon, lat, SPATIAL_CONTEXT), name);
closestCities.add(city);
}
}
return closestCities;
} catch (IOException e) {
throw new GeoEntryQueryException("Error reading the index", e);
}
}
Aggregations