Search in sources :

Example 61 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestPerFieldDocValuesFormat method testTwoFieldsTwoFormats.

// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
    final DocValuesFormat slow = DocValuesFormat.forName("Memory");
    iwc.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if ("dv1".equals(field)) {
                return fast;
            } else {
                return slow;
            }
        }
    });
    IndexWriter iwriter = new IndexWriter(directory, iwc);
    Document doc = new Document();
    String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
    String text = "This is the text to be indexed. " + longTerm;
    doc.add(newTextField("fieldname", text, Field.Store.YES));
    doc.add(new NumericDocValuesField("dv1", 5));
    doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
    iwriter.addDocument(doc);
    iwriter.close();
    // Now search the index:
    // read-only=true
    IndexReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = newSearcher(ireader);
    assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
    Query query = new TermQuery(new Term("fieldname", "text"));
    TopDocs hits = isearcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    // Iterate through the results:
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        int hitDocID = hits.scoreDocs[i].doc;
        Document hitDoc = isearcher.doc(hitDocID);
        assertEquals(text, hitDoc.get("fieldname"));
        assert ireader.leaves().size() == 1;
        NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
        assertEquals(hitDocID, dv.advance(hitDocID));
        assertEquals(5, dv.longValue());
        BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
        assertEquals(hitDocID, dv2.advance(hitDocID));
        final BytesRef term = dv2.binaryValue();
        assertEquals(new BytesRef("hello world"), term);
    }
    ireader.close();
    directory.close();
}
Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 62 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestMemoryIndexAgainstRAMDir method duellReaders.

private void duellReaders(CompositeReader other, LeafReader memIndexReader) throws IOException {
    Fields memFields = memIndexReader.fields();
    for (String field : MultiFields.getFields(other)) {
        Terms memTerms = memFields.terms(field);
        Terms iwTerms = memIndexReader.terms(field);
        if (iwTerms == null) {
            assertNull(memTerms);
        } else {
            NumericDocValues normValues = MultiDocValues.getNormValues(other, field);
            NumericDocValues memNormValues = memIndexReader.getNormValues(field);
            if (normValues != null) {
                // mem idx always computes norms on the fly
                assertNotNull(memNormValues);
                assertEquals(0, normValues.nextDoc());
                assertEquals(0, memNormValues.nextDoc());
                assertEquals(normValues.longValue(), memNormValues.longValue());
            }
            assertNotNull(memTerms);
            assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
            assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
            assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
            TermsEnum iwTermsIter = iwTerms.iterator();
            TermsEnum memTermsIter = memTerms.iterator();
            if (iwTerms.hasPositions()) {
                final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
                while (iwTermsIter.next() != null) {
                    assertNotNull(memTermsIter.next());
                    assertEquals(iwTermsIter.term(), memTermsIter.term());
                    PostingsEnum iwDocsAndPos = iwTermsIter.postings(null, PostingsEnum.ALL);
                    PostingsEnum memDocsAndPos = memTermsIter.postings(null, PostingsEnum.ALL);
                    while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                        assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
                        assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
                        for (int i = 0; i < iwDocsAndPos.freq(); i++) {
                            assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
                            if (offsets) {
                                assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
                                assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
                            }
                            if (iwTerms.hasPayloads()) {
                                assertEquals(iwDocsAndPos.getPayload(), memDocsAndPos.getPayload());
                            }
                        }
                    }
                }
            } else {
                while (iwTermsIter.next() != null) {
                    assertEquals(iwTermsIter.term(), memTermsIter.term());
                    PostingsEnum iwDocsAndPos = iwTermsIter.postings(null);
                    PostingsEnum memDocsAndPos = memTermsIter.postings(null);
                    while (iwDocsAndPos.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                        assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
                        assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
                    }
                }
            }
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint)

Example 63 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestMemoryIndex method testDocValues.

public void testDocValues() throws Exception {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("numeric", 29L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
    doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    assertEquals(0, numericDocValues.nextDoc());
    assertEquals(29L, numericDocValues.longValue());
    SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
    assertEquals(0, sortedNumericDocValues.nextDoc());
    assertEquals(5, sortedNumericDocValues.docValueCount());
    assertEquals(30L, sortedNumericDocValues.nextValue());
    assertEquals(31L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(33L, sortedNumericDocValues.nextValue());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    assertEquals(0, sortedDocValues.nextDoc());
    assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
    assertEquals(0, sortedDocValues.ordValue());
    assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    assertEquals(3, sortedSetDocValues.getValueCount());
    assertEquals(0, sortedSetDocValues.nextDoc());
    assertEquals(0L, sortedSetDocValues.nextOrd());
    assertEquals(1L, sortedSetDocValues.nextOrd());
    assertEquals(2L, sortedSetDocValues.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
    assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
    assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
    assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 64 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestMemoryIndexAgainstRAMDir method testNormsWithDocValues.

public void testNormsWithDocValues() throws Exception {
    MemoryIndex mi = new MemoryIndex(true, true);
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    mi.addField(new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer);
    mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    Field field = new TextField("text", "quick brown fox", Field.Store.NO);
    doc.add(field);
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.addDocument(doc);
    writer.close();
    IndexReader controlIndexReader = DirectoryReader.open(dir);
    LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();
    NumericDocValues norms = controlLeafReader.getNormValues("text");
    assertEquals(0, norms.nextDoc());
    NumericDocValues norms2 = leafReader.getNormValues("text");
    assertEquals(0, norms2.nextDoc());
    assertEquals(norms.longValue(), norms2.longValue());
    controlIndexReader.close();
    dir.close();
}
Also used : SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) NumericDocValues(org.apache.lucene.index.NumericDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 65 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class BaseRangeFieldQueryTestCase method verify.

private void verify(Range[][] ranges) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    // Else we can get O(N^2) merging
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < ranges.length / 100) {
        iwc.setMaxBufferedDocs(ranges.length / 100);
    }
    Directory dir;
    if (ranges.length > 50000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < ranges.length; ++id) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        if (ranges[id][0].isMissing == false) {
            for (int n = 0; n < ranges[id].length; ++n) {
                doc.add(newRangeField(ranges[id][n]));
            }
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 1) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.out.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    IndexSearcher s = newSearcher(r);
    int dimensions = ranges[0][0].numDimensions();
    int iters = atLeast(25);
    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
    int maxDoc = s.getIndexReader().maxDoc();
    for (int iter = 0; iter < iters; ++iter) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " s=" + s);
        }
        // occasionally test open ended bounding ranges
        Range queryRange = nextRange(dimensions);
        int rv = random().nextInt(4);
        Query query;
        Range.QueryType queryType;
        if (rv == 0) {
            queryType = Range.QueryType.INTERSECTS;
            query = newIntersectsQuery(queryRange);
        } else if (rv == 1) {
            queryType = Range.QueryType.CONTAINS;
            query = newContainsQuery(queryRange);
        } else if (rv == 2) {
            queryType = Range.QueryType.WITHIN;
            query = newWithinQuery(queryRange);
        } else {
            queryType = Range.QueryType.CROSSES;
            query = newCrossesQuery(queryRange);
        }
        if (VERBOSE) {
            System.out.println("  query=" + query);
        }
        final FixedBitSet hits = new FixedBitSet(maxDoc);
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < maxDoc; ++docID) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            boolean expected;
            if (liveDocs != null && liveDocs.get(docID) == false) {
                // document is deleted
                expected = false;
            } else if (ranges[id][0].isMissing) {
                expected = false;
            } else {
                expected = expectedResult(queryRange, ranges[id], queryType);
            }
            if (hits.get(docID) != expected) {
                StringBuilder b = new StringBuilder();
                b.append("FAIL (iter " + iter + "): ");
                if (expected == true) {
                    b.append("id=" + id + (ranges[id].length > 1 ? " (MultiValue) " : " ") + "should match but did not\n");
                } else {
                    b.append("id=" + id + " should not match but did\n");
                }
                b.append(" queryRange=" + queryRange + "\n");
                b.append(" box" + ((ranges[id].length > 1) ? "es=" : "=") + ranges[id][0]);
                for (int n = 1; n < ranges[id].length; ++n) {
                    b.append(", ");
                    b.append(ranges[id][n]);
                }
                b.append("\n queryType=" + queryType + "\n");
                b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
                fail("wrong hit (first of possibly more):\n\n" + b);
            }
        }
    }
    IOUtils.close(r, dir);
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Bits(org.apache.lucene.util.Bits) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

NumericDocValues (org.apache.lucene.index.NumericDocValues)81 Document (org.apache.lucene.document.Document)30 Directory (org.apache.lucene.store.Directory)29 LeafReader (org.apache.lucene.index.LeafReader)25 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)22 IOException (java.io.IOException)20 BytesRef (org.apache.lucene.util.BytesRef)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 HashSet (java.util.HashSet)16 Bits (org.apache.lucene.util.Bits)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 SortedDocValues (org.apache.lucene.index.SortedDocValues)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)12 IndexReader (org.apache.lucene.index.IndexReader)12 Term (org.apache.lucene.index.Term)12