use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.
the class TestDoubleValuesSource method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
int numDocs = TestUtil.nextInt(random(), 2049, 4000);
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO));
document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
document.add(new NumericDocValuesField("int", random().nextInt()));
document.add(new NumericDocValuesField("long", random().nextLong()));
document.add(new FloatDocValuesField("float", random().nextFloat()));
document.add(new DoubleDocValuesField("double", random().nextDouble()));
if (i == 545)
document.add(new DoubleDocValuesField("onefield", 45.72));
iw.addDocument(document);
}
reader = iw.getReader();
iw.close();
searcher = newSearcher(reader);
}
use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testMissingDoubleFirst.
public void testMissingDoubleFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(1, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(2, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testRandom3.
// pits index time sorting against query time sorting
public void testRandom3() throws Exception {
int numDocs;
if (TEST_NIGHTLY) {
numDocs = atLeast(100000);
} else {
numDocs = atLeast(1000);
}
List<RandomDoc> docs = new ArrayList<>();
Sort sort = randomSort();
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
}
// no index sorting, all search-time sorting:
Directory dir1 = newFSDirectory(createTempDir());
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w1 = new IndexWriter(dir1, iwc1);
// use index sorting:
Directory dir2 = newFSDirectory(createTempDir());
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc2.setIndexSort(sort);
IndexWriter w2 = new IndexWriter(dir2, iwc2);
Set<Integer> toDelete = new HashSet<>();
double deleteChance = random().nextDouble();
for (int id = 0; id < numDocs; id++) {
RandomDoc docValues = new RandomDoc(id);
docs.add(docValues);
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
System.out.println(" int=" + docValues.intValue);
System.out.println(" long=" + docValues.longValue);
System.out.println(" float=" + docValues.floatValue);
System.out.println(" double=" + docValues.doubleValue);
System.out.println(" bytes=" + new BytesRef(docValues.bytesValue));
}
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
doc.add(new NumericDocValuesField("id", id));
doc.add(new NumericDocValuesField("int", docValues.intValue));
doc.add(new NumericDocValuesField("long", docValues.longValue));
doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
doc.add(new FloatDocValuesField("float", docValues.floatValue));
doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue)));
for (int value : docValues.intValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_int", value));
}
for (long value : docValues.longValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_long", value));
}
for (float value : docValues.floatValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_float", NumericUtils.floatToSortableInt(value)));
}
for (double value : docValues.doubleValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_double", NumericUtils.doubleToSortableLong(value)));
}
for (byte[] value : docValues.bytesValues) {
doc.add(new SortedSetDocValuesField("multi_valued_bytes", new BytesRef(value)));
}
w1.addDocument(doc);
w2.addDocument(doc);
if (random().nextDouble() < deleteChance) {
toDelete.add(id);
}
}
for (int id : toDelete) {
w1.deleteDocuments(new Term("id", Integer.toString(id)));
w2.deleteDocuments(new Term("id", Integer.toString(id)));
}
DirectoryReader r1 = DirectoryReader.open(w1);
IndexSearcher s1 = newSearcher(r1);
if (random().nextBoolean()) {
int maxSegmentCount = TestUtil.nextInt(random(), 1, 5);
if (VERBOSE) {
System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")");
}
w2.forceMerge(maxSegmentCount);
}
DirectoryReader r2 = DirectoryReader.open(w2);
IndexSearcher s2 = newSearcher(r2);
for (int iter = 0; iter < 100; iter++) {
int numHits = TestUtil.nextInt(random(), 1, numDocs);
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
}
TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true);
s1.search(new MatchAllDocsQuery(), c1);
TopDocs hits1 = c1.topDocs();
TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true);
EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits);
s2.search(new MatchAllDocsQuery(), c3);
TopDocs hits2 = c2.topDocs();
if (VERBOSE) {
System.out.println(" topDocs query-time sort: totalHits=" + hits1.totalHits);
for (ScoreDoc scoreDoc : hits1.scoreDocs) {
System.out.println(" " + scoreDoc.doc);
}
System.out.println(" topDocs index-time sort: totalHits=" + hits2.totalHits);
for (ScoreDoc scoreDoc : hits2.scoreDocs) {
System.out.println(" " + scoreDoc.doc);
}
}
assertTrue(hits2.totalHits <= hits1.totalHits);
assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length);
for (int i = 0; i < hits2.scoreDocs.length; i++) {
ScoreDoc hit1 = hits1.scoreDocs[i];
ScoreDoc hit2 = hits2.scoreDocs[i];
assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
}
}
IOUtils.close(r1, r2, w1, w2, dir1, dir2);
}
use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.
the class TestSort method testDoubleMissingLast.
/** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */
public void testDoubleMissingLast() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", -1.3));
doc.add(newStringField("value", "-1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333333));
doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new DoubleDocValuesField("value", 4.2333333333332));
doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
SortField sortField = new SortField("value", SortField.Type.DOUBLE);
sortField.setMissingValue(Double.MAX_VALUE);
Sort sort = new Sort(sortField);
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(4, td.totalHits);
// null treated as Double.MAX_VALUE
assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[3].doc).get("value"));
ir.close();
dir.close();
}
use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithDoubleValues.
public void testDocsWithDoubleValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "numeric";
int numDocs = TestUtil.nextInt(random(), 1, 100);
double[] docValues = new double[numDocs];
double nextVal = 1.0;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
doc.add(new DoubleDocValuesField(field, nextVal));
doc.add(new StringField("id", "doc" + i, Store.NO));
docValues[i] = nextVal;
++nextVal;
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = 0;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) getZeroValues(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
double variance = computeVariance(docValues, stats.mean, stats.count());
assertEquals(variance, stats.variance(), 0.00001);
assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
}
}
}
}
Aggregations