use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testOneDoc.
public void testOneDoc() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "numeric";
Document doc = new Document();
doc.add(new NumericDocValuesField(field, 1));
doc.add(new StringField("id", "doc1", Store.NO));
indexWriter.addDocument(doc);
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
LongDocValuesStats stats = new LongDocValuesStats(field);
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
assertEquals(1, stats.count());
assertEquals(0, stats.missing());
assertEquals(1, stats.max().longValue());
assertEquals(1, stats.min().longValue());
assertEquals(1, stats.sum().longValue());
assertEquals(1, stats.mean(), 0.0001);
assertEquals(0, stats.variance(), 0.0001);
assertEquals(0, stats.stdev(), 0.0001);
}
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithDoubleValues.
public void testDocsWithDoubleValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "numeric";
int numDocs = TestUtil.nextInt(random(), 1, 100);
double[] docValues = new double[numDocs];
double nextVal = 1.0;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
doc.add(new DoubleDocValuesField(field, nextVal));
doc.add(new StringField("id", "doc" + i, Store.NO));
docValues[i] = nextVal;
++nextVal;
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = 0;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) getZeroValues(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
double variance = computeVariance(docValues, stats.mean, stats.count());
assertEquals(variance, stats.variance(), 0.00001);
assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
}
}
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithLongValues.
public void testDocsWithLongValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "numeric";
int numDocs = TestUtil.nextInt(random(), 1, 100);
long[] docValues = new long[numDocs];
int nextVal = 1;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
doc.add(new NumericDocValuesField(field, nextVal));
doc.add(new StringField("id", "doc" + i, Store.NO));
docValues[i] = nextVal;
++nextVal;
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = 0;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
LongDocValuesStats stats = new LongDocValuesStats(field);
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) getZeroValues(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
LongSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
assertEquals(sumStats.getMax(), stats.max().longValue());
assertEquals(sumStats.getMin(), stats.min().longValue());
assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
assertEquals(sumStats.getSum(), stats.sum().longValue());
double variance = computeVariance(docValues, stats.mean, stats.count());
assertEquals(variance, stats.variance(), 0.00001);
assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
}
}
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class SweetSpotSimilarityTest method computeNorm.
private static float computeNorm(Similarity sim, String field, int length) throws IOException {
String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
Directory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
w.addDocument(Collections.singleton(newTextField(field, value, Store.NO)));
DirectoryReader reader = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(sim);
Explanation expl = searcher.explain(new TermQuery(new Term(field, "a")), 0);
reader.close();
dir.close();
Explanation norm = findExplanation(expl, "fieldNorm");
assertNotNull(norm);
return norm.getValue();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class SimplePrimaryNode method verifyAtLeastMarkerCount.
private void verifyAtLeastMarkerCount(int expectedAtLeastCount, DataOutput out) throws IOException {
IndexSearcher searcher = mgr.acquire();
try {
long version = ((DirectoryReader) searcher.getIndexReader()).getVersion();
int hitCount = searcher.count(new TermQuery(new Term("marker", "marker")));
if (hitCount < expectedAtLeastCount) {
message("marker search: expectedAtLeastCount=" + expectedAtLeastCount + " but hitCount=" + hitCount);
TopDocs hits = searcher.search(new TermQuery(new Term("marker", "marker")), expectedAtLeastCount);
List<Integer> seen = new ArrayList<>();
for (ScoreDoc hit : hits.scoreDocs) {
Document doc = searcher.doc(hit.doc);
seen.add(Integer.parseInt(doc.get("docid").substring(1)));
}
Collections.sort(seen);
message("saw markers:");
for (int marker : seen) {
message("saw m" + marker);
}
throw new IllegalStateException("at flush: marker count " + hitCount + " but expected at least " + expectedAtLeastCount + " version=" + version);
}
if (out != null) {
out.writeVLong(version);
out.writeVInt(hitCount);
}
} finally {
mgr.release(searcher);
}
}
Aggregations