use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class CachingNaiveBayesClassifier method getWordFreqForClassess.
private Map<BytesRef, Integer> getWordFreqForClassess(String word) throws IOException {
Map<BytesRef, Integer> insertPoint;
insertPoint = termCClassHitCache.get(word);
// if we get the answer from the cache
if (insertPoint != null) {
if (!insertPoint.isEmpty()) {
return insertPoint;
}
}
Map<BytesRef, Integer> searched = new ConcurrentHashMap<>();
// if we dont get the answer, but it's relevant we must search it and insert to the cache
if (insertPoint != null || !justCachedTerms) {
for (BytesRef cclass : cclasses) {
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
for (String textFieldName : textFieldNames) {
subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
}
booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
booleanQuery.add(new BooleanClause(new TermQuery(new Term(classFieldName, cclass)), BooleanClause.Occur.MUST));
if (query != null) {
booleanQuery.add(query, BooleanClause.Occur.MUST);
}
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
int ret = totalHitCountCollector.getTotalHits();
if (ret != 0) {
searched.put(cclass, ret);
}
}
if (insertPoint != null) {
// threadsafe and concurrent write
termCClassHitCache.put(word, searched);
}
}
return searched;
}
use of org.apache.lucene.search.TotalHitCountCollector in project querydsl by querydsl.
the class AbstractLuceneQuery method innerCount.
private long innerCount() {
try {
final int maxDoc = searcher.maxDoc();
if (maxDoc == 0) {
return 0;
}
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(createQuery(), getFilter(), collector);
return collector.getTotalHits();
} catch (IOException | IllegalArgumentException e) {
throw new QueryException(e);
}
}
use of org.apache.lucene.search.TotalHitCountCollector in project neo4j by neo4j.
the class FulltextIndexReader method countIndexedEntities.
/**
* When matching entities in the fulltext index there are some special cases that makes it hard to check that entities
* actually have the expected property values. To match we use the entityId and only take entries that doesn't contain any
* unexpected properties. But we don't check that expected properties are present, see
* {@link LuceneFulltextDocumentStructure#newCountEntityEntriesQuery} for more details.
*/
@Override
public long countIndexedEntities(long entityId, CursorContext cursorContext, int[] propertyKeyIds, Value... propertyValues) {
long count = 0;
for (SearcherReference searcher : searchers) {
try {
String[] propertyKeys = new String[propertyKeyIds.length];
for (int i = 0; i < propertyKeyIds.length; i++) {
propertyKeys[i] = getPropertyKeyName(propertyKeyIds[i]);
}
Query query = LuceneFulltextDocumentStructure.newCountEntityEntriesQuery(entityId, propertyKeys, propertyValues);
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.getIndexSearcher().search(query, collector);
count += collector.getTotalHits();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return count;
}
use of org.apache.lucene.search.TotalHitCountCollector in project crate by crate.
the class InternalEngineTests method testTranslogMultipleOperationsSameDocument.
@Test
public void testTranslogMultipleOperationsSameDocument() throws IOException {
final int ops = randomIntBetween(1, 32);
Engine initialEngine;
final List<Engine.Operation> operations = new ArrayList<>();
try {
initialEngine = engine;
for (int i = 0; i < ops; i++) {
final ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(), SOURCE, null);
if (randomBoolean()) {
final Engine.Index operation = new Engine.Index(newUid(doc), doc, UNASSIGNED_SEQ_NO, 0, i, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), -1, false, UNASSIGNED_SEQ_NO, 0);
operations.add(operation);
initialEngine.index(operation);
} else {
final Engine.Delete operation = new Engine.Delete("1", newUid(doc), UNASSIGNED_SEQ_NO, 0, i, VersionType.EXTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime(), UNASSIGNED_SEQ_NO, 0);
operations.add(operation);
initialEngine.delete(operation);
}
}
} finally {
IOUtils.close(engine);
}
try (Engine recoveringEngine = new InternalEngine(engine.config())) {
recoveringEngine.recoverFromTranslog(translogHandler, Long.MAX_VALUE);
recoveringEngine.refresh("test");
try (Engine.Searcher searcher = recoveringEngine.acquireSearcher("test")) {
final TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(new MatchAllDocsQuery(), collector);
assertThat(collector.getTotalHits(), equalTo(operations.get(operations.size() - 1) instanceof Engine.Delete ? 0 : 1));
}
}
}
use of org.apache.lucene.search.TotalHitCountCollector in project crate by crate.
the class InternalEngineTests method testConcurrentOutOfOrderDocsOnReplica.
@Test
public void testConcurrentOutOfOrderDocsOnReplica() throws IOException, InterruptedException {
final List<Engine.Operation> opsDoc1 = generateSingleDocHistory(true, randomFrom(VersionType.INTERNAL, VersionType.EXTERNAL), false, 2, 100, 300, "1");
final Engine.Operation lastOpDoc1 = opsDoc1.get(opsDoc1.size() - 1);
final String lastFieldValueDoc1;
if (lastOpDoc1 instanceof Engine.Index) {
Engine.Index index = (Engine.Index) lastOpDoc1;
lastFieldValueDoc1 = index.docs().get(0).get("value");
} else {
// delete
lastFieldValueDoc1 = null;
}
final List<Engine.Operation> opsDoc2 = generateSingleDocHistory(true, randomFrom(VersionType.INTERNAL, VersionType.EXTERNAL), false, 2, 100, 300, "2");
final Engine.Operation lastOpDoc2 = opsDoc2.get(opsDoc2.size() - 1);
final String lastFieldValueDoc2;
if (lastOpDoc2 instanceof Engine.Index) {
Engine.Index index = (Engine.Index) lastOpDoc2;
lastFieldValueDoc2 = index.docs().get(0).get("value");
} else {
// delete
lastFieldValueDoc2 = null;
}
// randomly interleave
final AtomicLong seqNoGenerator = new AtomicLong();
BiFunction<Engine.Operation, Long, Engine.Operation> seqNoUpdater = (operation, newSeqNo) -> {
if (operation instanceof Engine.Index) {
Engine.Index index = (Engine.Index) operation;
Document doc = testDocumentWithTextField(index.docs().get(0).get("value"));
ParsedDocument parsedDocument = testParsedDocument(index.id(), index.routing(), doc, index.source(), null);
return new Engine.Index(index.uid(), parsedDocument, newSeqNo, index.primaryTerm(), index.version(), index.versionType(), index.origin(), index.startTime(), index.getAutoGeneratedIdTimestamp(), index.isRetry(), UNASSIGNED_SEQ_NO, 0);
} else {
Engine.Delete delete = (Engine.Delete) operation;
return new Engine.Delete(delete.id(), delete.uid(), newSeqNo, delete.primaryTerm(), delete.version(), delete.versionType(), delete.origin(), delete.startTime(), UNASSIGNED_SEQ_NO, 0);
}
};
final List<Engine.Operation> allOps = new ArrayList<>();
Iterator<Engine.Operation> iter1 = opsDoc1.iterator();
Iterator<Engine.Operation> iter2 = opsDoc2.iterator();
while (iter1.hasNext() && iter2.hasNext()) {
final Engine.Operation next = randomBoolean() ? iter1.next() : iter2.next();
allOps.add(seqNoUpdater.apply(next, seqNoGenerator.getAndIncrement()));
}
iter1.forEachRemaining(o -> allOps.add(seqNoUpdater.apply(o, seqNoGenerator.getAndIncrement())));
iter2.forEachRemaining(o -> allOps.add(seqNoUpdater.apply(o, seqNoGenerator.getAndIncrement())));
// insert some duplicates
randomSubsetOf(allOps).forEach(op -> allOps.add(seqNoUpdater.apply(op, op.seqNo())));
shuffle(allOps, random());
concurrentlyApplyOps(allOps, engine);
engine.refresh("test");
if (lastFieldValueDoc1 != null) {
try (Searcher searcher = engine.acquireSearcher("test")) {
final TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(new TermQuery(new Term("value", lastFieldValueDoc1)), collector);
assertThat(collector.getTotalHits(), equalTo(1));
}
}
if (lastFieldValueDoc2 != null) {
try (Searcher searcher = engine.acquireSearcher("test")) {
final TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(new TermQuery(new Term("value", lastFieldValueDoc2)), collector);
assertThat(collector.getTotalHits(), equalTo(1));
}
}
int totalExpectedOps = 0;
if (lastFieldValueDoc1 != null) {
totalExpectedOps++;
}
if (lastFieldValueDoc2 != null) {
totalExpectedOps++;
}
assertVisibleCount(engine, totalExpectedOps);
}
Aggregations