use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class DocToDoubleVectorUtilsTest method testDenseFreqDoubleArrayConversion.
@Test
public void testDenseFreqDoubleArrayConversion() throws Exception {
IndexSearcher indexSearcher = new IndexSearcher(index);
for (ScoreDoc scoreDoc : indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE).scoreDocs) {
Terms docTerms = index.getTermVector(scoreDoc.doc, "text");
Double[] vector = DocToDoubleVectorUtils.toDenseLocalFreqDoubleArray(docTerms);
assertNotNull(vector);
assertTrue(vector.length > 0);
}
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class TestIndexingSequenceNumbers method testStressConcurrentAddAndDeleteAndCommit.
@Slow
public void testStressConcurrentAddAndDeleteAndCommit() throws Exception {
final int opCount = atLeast(10000);
final int idCount = TestUtil.nextInt(random(), 10, 1000);
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
// Cannot use RIW since it randomly commits:
final IndexWriter w = new IndexWriter(dir, iwc);
final int numThreads = TestUtil.nextInt(random(), 2, 5);
Thread[] threads = new Thread[numThreads];
//System.out.println("TEST: iter=" + iter + " opCount=" + opCount + " idCount=" + idCount + " threadCount=" + threads.length);
final CountDownLatch startingGun = new CountDownLatch(1);
List<List<Operation>> threadOps = new ArrayList<>();
Object commitLock = new Object();
final List<Operation> commits = new ArrayList<>();
// multiple threads update the same set of documents, and we randomly commit
for (int i = 0; i < threads.length; i++) {
final List<Operation> ops = new ArrayList<>();
threadOps.add(ops);
final int threadID = i;
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (int i = 0; i < opCount; i++) {
Operation op = new Operation();
op.threadID = threadID;
if (random().nextInt(500) == 17) {
op.what = 2;
synchronized (commitLock) {
op.seqNo = w.commit();
if (op.seqNo != -1) {
commits.add(op);
}
}
} else {
op.id = random().nextInt(idCount);
Term idTerm = new Term("id", "" + op.id);
if (random().nextInt(10) == 1) {
op.what = 1;
if (random().nextBoolean()) {
op.seqNo = w.deleteDocuments(idTerm);
} else {
op.seqNo = w.deleteDocuments(new TermQuery(idTerm));
}
} else {
Document doc = new Document();
doc.add(new StoredField("threadop", threadID + "-" + ops.size()));
doc.add(new StringField("id", "" + op.id, Field.Store.NO));
if (random().nextBoolean()) {
List<Document> docs = new ArrayList<>();
docs.add(doc);
op.seqNo = w.addDocuments(docs);
} else {
op.seqNo = w.addDocument(doc);
}
op.what = 3;
}
ops.add(op);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].setName("thread" + threadID);
threads[i].start();
}
startingGun.countDown();
for (Thread thread : threads) {
thread.join();
}
Operation commitOp = new Operation();
commitOp.seqNo = w.commit();
if (commitOp.seqNo != -1) {
commits.add(commitOp);
}
List<IndexCommit> indexCommits = DirectoryReader.listCommits(dir);
assertEquals(commits.size(), indexCommits.size());
// how many docs with this id are expected:
int[] expectedCounts = new int[idCount];
long[] lastDelSeqNos = new long[idCount];
//System.out.println("TEST: " + commits.size() + " commits");
for (int i = 0; i < commits.size(); i++) {
// this commit point should reflect all operations <= this seqNo
long commitSeqNo = commits.get(i).seqNo;
//System.out.println(" commit " + i + ": seqNo=" + commitSeqNo + " segs=" + indexCommits.get(i));
// first find the highest seqNo of the last delete op, for each id, prior to this commit:
Arrays.fill(lastDelSeqNos, -1);
for (int threadID = 0; threadID < threadOps.size(); threadID++) {
long lastSeqNo = 0;
for (Operation op : threadOps.get(threadID)) {
if (op.what == 1 && op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id]) {
lastDelSeqNos[op.id] = op.seqNo;
}
// within one thread the seqNos must only increase:
assertTrue(op.seqNo > lastSeqNo);
lastSeqNo = op.seqNo;
}
}
// then count how many adds happened since the last delete and before this commit:
Arrays.fill(expectedCounts, 0);
for (int threadID = 0; threadID < threadOps.size(); threadID++) {
for (Operation op : threadOps.get(threadID)) {
if (op.what == 3 && op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id]) {
expectedCounts[op.id]++;
}
}
}
DirectoryReader r = DirectoryReader.open(indexCommits.get(i));
IndexSearcher s = new IndexSearcher(r);
for (int id = 0; id < idCount; id++) {
//System.out.println("TEST: check id=" + id + " expectedThreadID=" + expectedThreadIDs[id]);
int actualCount = s.count(new TermQuery(new Term("id", "" + id)));
if (expectedCounts[id] != actualCount) {
System.out.println("TEST: FAIL r=" + r + " id=" + id + " commitSeqNo=" + commitSeqNo);
for (int threadID = 0; threadID < threadOps.size(); threadID++) {
int opCount2 = 0;
for (Operation op : threadOps.get(threadID)) {
if (op.id == id) {
boolean shouldCount = op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id];
System.out.println(" id=" + id + " what=" + op.what + " threadop=" + threadID + "-" + opCount2 + " seqNo=" + op.seqNo + " vs lastDelSeqNo=" + lastDelSeqNos[op.id] + " shouldCount=" + shouldCount);
}
opCount2++;
}
}
TopDocs hits = s.search(new TermQuery(new Term("id", "" + id)), 1 + actualCount);
for (ScoreDoc hit : hits.scoreDocs) {
System.out.println(" hit: " + s.doc(hit.doc).get("threadop"));
}
for (LeafReaderContext ctx : r.leaves()) {
System.out.println(" sub=" + ctx.reader());
Bits liveDocs = ctx.reader().getLiveDocs();
for (int docID = 0; docID < ctx.reader().maxDoc(); docID++) {
System.out.println(" docID=" + docID + " threadop=" + ctx.reader().document(docID).get("threadop") + (liveDocs != null && liveDocs.get(docID) == false ? " (deleted)" : ""));
}
}
assertEquals("commit " + i + " of " + commits.size() + " id=" + id + " reader=" + r, expectedCounts[id], actualCount);
}
}
w.close();
r.close();
}
dir.close();
}
use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.
the class TweetSearcherAPI method search.
@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
try {
Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN = query.getCount();
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<SearchResult> resultHits = new ArrayList<>();
for (int i = 0; i < hits.length && i < topN; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
}
return resultHits;
} catch (Exception e) {
e.printStackTrace();
return new ArrayList<>();
}
}
use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.
the class TweetServlet method doGet.
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (request.getRequestURI().equals("/search")) {
response.setStatus(HttpServletResponse.SC_OK);
response.setContentType("text/html");
request.setCharacterEncoding("UTF-8");
Query q;
try {
q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN;
if (request.getParameter("top") != null) {
topN = Integer.parseInt(request.getParameter("top"));
} else {
// TODO configurable, default(parameter unspecified in url) topN = 20
topN = 20;
}
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
}
MustacheFactory mf = new DefaultMustacheFactory();
Mustache mustache = mf.compile(MustacheTemplatePath);
mustache.execute(response.getWriter(), tweetHits).flush();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
}
}
use of org.apache.lucene.search.ScoreDoc in project ddf by codice.
the class GeoNamesQueryLuceneIndex method doGetNearestCities.
protected List<NearbyLocation> doGetNearestCities(final Shape shape, final int radiusInKm, final int maxResults, final Directory directory) throws GeoEntryQueryException {
notNull(shape, "GeoNamesQueryLuceneIndex.doGetNearestCities(): argument 'shape' may not be null.");
if (radiusInKm <= 0) {
throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): radiusInKm must be positive.");
}
if (maxResults <= 0) {
throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): maxResults must be positive.");
}
if (directory == null) {
return Collections.emptyList();
}
try (final IndexReader indexReader = createIndexReader(directory)) {
final IndexSearcher indexSearcher = createIndexSearcher(indexReader);
final List<NearbyLocation> closestCities = new ArrayList<>();
final Point center = shape.getCenter();
final Query filter = createSpatialQuery(center, radiusInKm);
// Query for all the documents in the index that are cities, then filter those
// results for the ones that are in the search area.
final BooleanQuery booleanQuery = new BooleanQuery.Builder().add(PPL_QUERY, BooleanClause.Occur.MUST).add(filter, BooleanClause.Occur.FILTER).build();
final TopDocs topDocs = indexSearcher.search(booleanQuery, maxResults, SORT);
if (topDocs.totalHits > 0) {
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
final double lat = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LATITUDE_FIELD));
final double lon = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LONGITUDE_FIELD));
final String name = indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.NAME_FIELD);
final NearbyLocation city = new NearbyLocationImpl(center, new PointImpl(lon, lat, SPATIAL_CONTEXT), name);
closestCities.add(city);
}
}
return closestCities;
} catch (IOException e) {
throw new GeoEntryQueryException("Error reading the index", e);
}
}
Aggregations