Search in sources :

Example 11 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestUninvertingReader method testSortedSetIntegerManyValues.

/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED);
    NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
    final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
    UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
    UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
    final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
    MULTI_VALUES.add("trie_multi");
    MULTI_VALUES.add("notrie_multi");
    final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
    final int MIN = TestUtil.nextInt(random(), 10, 100);
    final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
    final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
    {
        // (at least) one doc should have every value, so that at least one segment has every value
        final Document doc = new Document();
        for (int i = MIN; i <= MAX; i++) {
            doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
        }
        iw.addDocument(doc);
    }
    // now add some more random docs (note: starting at i=1 because of previously added doc)
    for (int i = 1; i < NUM_DOCS; i++) {
        final Document doc = new Document();
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int val = TestUtil.nextInt(random(), MIN, MAX);
            doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
            doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
        }
        if (0 != TestUtil.nextInt(random(), 0, 9)) {
            int numMulti = atLeast(1);
            while (0 < numMulti--) {
                int val = TestUtil.nextInt(random(), MIN, MAX);
                doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
                doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
            }
        }
        iw.addDocument(doc);
    }
    iw.close();
    final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
    TestUtil.checkReader(ir);
    final int NUM_LEAVES = ir.leaves().size();
    // check the leaves: no more then total set size
    for (LeafReaderContext rc : ir.leaves()) {
        final LeafReader ar = rc.reader();
        for (String f : UNINVERT_MAP.keySet()) {
            final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
            final long valSetSize = v.getValueCount();
            assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE);
            if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
                // tighter check on multi fields in single segment index since we know one doc has all of them
                assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize);
            }
        }
    }
    // check the composite of all leaves: exact expectation of set size
    final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
    TestUtil.checkReader(composite);
    for (String f : MULTI_VALUES) {
        final SortedSetDocValues v = composite.getSortedSetDocValues(f);
        final long valSetSize = v.getValueCount();
        assertEquals(f + ": Composite reader value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize);
    }
    ir.close();
    dir.close();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) LeafReader(org.apache.lucene.index.LeafReader) LegacyFieldType(org.apache.solr.legacy.LegacyFieldType) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) LinkedHashMap(java.util.LinkedHashMap) Type(org.apache.solr.uninverting.UninvertingReader.Type) LegacyFieldType(org.apache.solr.legacy.LegacyFieldType) DocValuesType(org.apache.lucene.index.DocValuesType) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) LegacyIntField(org.apache.solr.legacy.LegacyIntField)

Example 12 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method verify.

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE, TestUtil.nextInt(random(), 2, 10));
    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }
    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                // ok
                }
            } else {
            // ok
            }
        }
        return;
    }
    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }
    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }
        if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus) Terms(org.apache.lucene.index.Terms) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 13 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testNumericEncoded32.

public void testNumericEncoded32() throws IOException {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
    assertEquals(2, v.getValueCount());
    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
    ir.close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LegacyIntField(org.apache.solr.legacy.LegacyIntField)

Example 14 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testBackToTheFuture.

public void testBackToTheFuture() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    doc.add(newStringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("foo", "baz", Field.Store.NO));
    // we need a second value for a doc, or we don't actually test DocTermOrds!
    doc.add(newStringField("foo", "car", Field.Store.NO));
    iw.addDocument(doc);
    DirectoryReader r1 = DirectoryReader.open(iw);
    iw.deleteDocuments(new Term("foo", "baz"));
    DirectoryReader r2 = DirectoryReader.open(iw);
    FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
    assertEquals(3, v.getValueCount());
    assertEquals(1, v.advance(1));
    assertEquals(1, v.nextOrd());
    iw.close();
    r1.close();
    r2.close();
    dir.close();
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 15 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testNumericEncoded64.

public void testNumericEncoded64() throws IOException {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
    assertEquals(2, v.getValueCount());
    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
    ir.close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) LegacyLongField(org.apache.solr.legacy.LegacyLongField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)53 BytesRef (org.apache.lucene.util.BytesRef)33 Document (org.apache.lucene.document.Document)25 LeafReader (org.apache.lucene.index.LeafReader)24 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)22 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 Directory (org.apache.lucene.store.Directory)22 DirectoryReader (org.apache.lucene.index.DirectoryReader)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)17 IndexWriter (org.apache.lucene.index.IndexWriter)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)10 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)9 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)9 NumericDocValues (org.apache.lucene.index.NumericDocValues)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7