Examples with SortedSetDocValues - org.apache.lucene.index.SortedSetDocValues

Example 31 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testActuallySingleValued.

public void testActuallySingleValued() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwconfig = newIndexWriterConfig(null);
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwconfig);
    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
    // actually a single-valued field
    assertNotNull(DocValues.unwrapSingleton(v));
    assertEquals(2, v.getValueCount());
    assertEquals(0, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(1, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(3, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    BytesRef value = v.lookupOrd(0);
    assertEquals("bar", value.utf8ToString());
    value = v.lookupOrd(1);
    assertEquals("baz", value.utf8ToString());
    ir.close();
    dir.close();
}

Also used : LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 32 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestFieldCache method test.

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }
    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }
    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }
    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }
    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }
    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }
    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }
    int nTerms = termsIndex.getValueCount();
    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }
    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");
    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }
    // test bad field
    terms = cache.getTerms(reader, "bogusfield");
    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);
    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }
    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);
    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)

Example 33 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonexistantFields.

public void testNonexistantFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 34 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonIndexedFields.

public void testNonIndexedFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StoredField("bogusbytes", "bogus"));
    doc.add(new StoredField("bogusshorts", "bogus"));
    doc.add(new StoredField("bogusints", "bogus"));
    doc.add(new StoredField("boguslongs", "bogus"));
    doc.add(new StoredField("bogusfloats", "bogus"));
    doc.add(new StoredField("bogusdoubles", "bogus"));
    doc.add(new StoredField("bogusterms", "bogus"));
    doc.add(new StoredField("bogustermsindex", "bogus"));
    doc.add(new StoredField("bogusmultivalued", "bogus"));
    doc.add(new StoredField("bogusbits", "bogus"));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) StoredField(org.apache.lucene.document.StoredField) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 35 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project jackrabbit-oak by apache.

the class FilteredSortedSetDocValuesFacetCounts method filterFacet.

private LabelAndValue[] filterFacet(int docId, String dimension, LabelAndValue[] labelAndValues) throws IOException {
    boolean filterd = false;
    Map<String, Long> newValues = new HashMap<String, Long>();
    Document document = reader.document(docId);
    SortedSetDocValues docValues = state.getDocValues();
    docValues.setDocument(docId);
    // filter using doc values (avoiding requiring stored values)
    if (!filter.isAccessible(document.getField(FieldNames.PATH).stringValue() + "/" + dimension)) {
        filterd = true;
        for (LabelAndValue lv : labelAndValues) {
            long existingCount = lv.value.longValue();
            BytesRef key = new BytesRef(FacetsConfig.pathToString(dimension, new String[] { lv.label }));
            long l = docValues.lookupTerm(key);
            if (l >= 0) {
                if (existingCount > 0) {
                    newValues.put(lv.label, existingCount - 1);
                } else {
                    if (newValues.containsKey(lv.label)) {
                        newValues.remove(lv.label);
                    }
                }
            }
        }
    }
    LabelAndValue[] filteredLVs;
    if (filterd) {
        filteredLVs = new LabelAndValue[newValues.size()];
        int i = 0;
        for (Map.Entry<String, Long> entry : newValues.entrySet()) {
            filteredLVs[i] = new LabelAndValue(entry.getKey(), entry.getValue());
            i++;
        }
    } else {
        filteredLVs = labelAndValues;
    }
    return filteredLVs;
}

Also used : HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) LabelAndValue(org.apache.lucene.facet.LabelAndValue) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) HashMap(java.util.HashMap) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)53 BytesRef (org.apache.lucene.util.BytesRef)33 Document (org.apache.lucene.document.Document)25 LeafReader (org.apache.lucene.index.LeafReader)24 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)22 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 Directory (org.apache.lucene.store.Directory)22 DirectoryReader (org.apache.lucene.index.DirectoryReader)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)17 IndexWriter (org.apache.lucene.index.IndexWriter)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)10 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)9 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)9 NumericDocValues (org.apache.lucene.index.NumericDocValues)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7