use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestPerFieldDocValuesFormat method testTwoFieldsTwoFormats.
// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
final DocValuesFormat slow = DocValuesFormat.forName("Memory");
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if ("dv1".equals(field)) {
return fast;
} else {
return slow;
}
}
});
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
// read-only=true
IndexReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = newSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int hitDocID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(hitDocID, dv.advance(hitDocID));
assertEquals(5, dv.longValue());
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
assertEquals(hitDocID, dv2.advance(hitDocID));
final BytesRef term = dv2.binaryValue();
assertEquals(new BytesRef("hello world"), term);
}
ireader.close();
directory.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestMemoryIndexAgainstRAMDir method duellReaders.
private void duellReaders(CompositeReader other, LeafReader memIndexReader) throws IOException {
Fields memFields = memIndexReader.fields();
for (String field : MultiFields.getFields(other)) {
Terms memTerms = memFields.terms(field);
Terms iwTerms = memIndexReader.terms(field);
if (iwTerms == null) {
assertNull(memTerms);
} else {
NumericDocValues normValues = MultiDocValues.getNormValues(other, field);
NumericDocValues memNormValues = memIndexReader.getNormValues(field);
if (normValues != null) {
// mem idx always computes norms on the fly
assertNotNull(memNormValues);
assertEquals(0, normValues.nextDoc());
assertEquals(0, memNormValues.nextDoc());
assertEquals(normValues.longValue(), memNormValues.longValue());
}
assertNotNull(memTerms);
assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
TermsEnum iwTermsIter = iwTerms.iterator();
TermsEnum memTermsIter = memTerms.iterator();
if (iwTerms.hasPositions()) {
final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
while (iwTermsIter.next() != null) {
assertNotNull(memTermsIter.next());
assertEquals(iwTermsIter.term(), memTermsIter.term());
PostingsEnum iwDocsAndPos = iwTermsIter.postings(null, PostingsEnum.ALL);
PostingsEnum memDocsAndPos = memTermsIter.postings(null, PostingsEnum.ALL);
while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
for (int i = 0; i < iwDocsAndPos.freq(); i++) {
assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
if (offsets) {
assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
}
if (iwTerms.hasPayloads()) {
assertEquals(iwDocsAndPos.getPayload(), memDocsAndPos.getPayload());
}
}
}
}
} else {
while (iwTermsIter.next() != null) {
assertEquals(iwTermsIter.term(), memTermsIter.term());
PostingsEnum iwDocsAndPos = iwTermsIter.postings(null);
PostingsEnum memDocsAndPos = memTermsIter.postings(null);
while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
}
}
}
}
}
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestMemoryIndex method testDocValues.
public void testDocValues() throws Exception {
Document doc = new Document();
doc.add(new NumericDocValuesField("numeric", 29L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
assertEquals(0, numericDocValues.nextDoc());
assertEquals(29L, numericDocValues.longValue());
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
assertEquals(0, sortedNumericDocValues.nextDoc());
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
assertEquals(0, sortedDocValues.nextDoc());
assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
assertEquals(0, sortedDocValues.ordValue());
assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class TestMemoryIndexAgainstRAMDir method testNormsWithDocValues.
public void testNormsWithDocValues() throws Exception {
MemoryIndex mi = new MemoryIndex(true, true);
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
mi.addField(new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer);
mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
Document doc = new Document();
doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
Field field = new TextField("text", "quick brown fox", Field.Store.NO);
doc.add(field);
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
writer.addDocument(doc);
writer.close();
IndexReader controlIndexReader = DirectoryReader.open(dir);
LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();
NumericDocValues norms = controlLeafReader.getNormValues("text");
assertEquals(0, norms.nextDoc());
NumericDocValues norms2 = leafReader.getNormValues("text");
assertEquals(0, norms2.nextDoc());
assertEquals(norms.longValue(), norms2.longValue());
controlIndexReader.close();
dir.close();
}
use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.
the class BaseRangeFieldQueryTestCase method verify.
private void verify(Range[][] ranges) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < ranges.length / 100) {
iwc.setMaxBufferedDocs(ranges.length / 100);
}
Directory dir;
if (ranges.length > 50000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < ranges.length; ++id) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (ranges[id][0].isMissing == false) {
for (int n = 0; n < ranges[id].length; ++n) {
doc.add(newRangeField(ranges[id][n]));
}
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 1) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int dimensions = ranges[0][0].numDimensions();
int iters = atLeast(25);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; ++iter) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// occasionally test open ended bounding ranges
Range queryRange = nextRange(dimensions);
int rv = random().nextInt(4);
Query query;
Range.QueryType queryType;
if (rv == 0) {
queryType = Range.QueryType.INTERSECTS;
query = newIntersectsQuery(queryRange);
} else if (rv == 1) {
queryType = Range.QueryType.CONTAINS;
query = newContainsQuery(queryRange);
} else if (rv == 2) {
queryType = Range.QueryType.WITHIN;
query = newWithinQuery(queryRange);
} else {
queryType = Range.QueryType.CROSSES;
query = newCrossesQuery(queryRange);
}
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public boolean needsScores() {
return false;
}
});
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; ++docID) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (ranges[id][0].isMissing) {
expected = false;
} else {
expected = expectedResult(queryRange, ranges[id], queryType);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
b.append("FAIL (iter " + iter + "): ");
if (expected == true) {
b.append("id=" + id + (ranges[id].length > 1 ? " (MultiValue) " : " ") + "should match but did not\n");
} else {
b.append("id=" + id + " should not match but did\n");
}
b.append(" queryRange=" + queryRange + "\n");
b.append(" box" + ((ranges[id].length > 1) ? "es=" : "=") + ranges[id][0]);
for (int n = 1; n < ranges[id].length; ++n) {
b.append(", ");
b.append(ranges[id][n]);
}
b.append("\n queryType=" + queryType + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
fail("wrong hit (first of possibly more):\n\n" + b);
}
}
}
IOUtils.close(r, dir);
}
Aggregations