Search in sources :

Example 56 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project stargate-core by tuplejump.

the class IndexEntryCollector method getIndexEntry.

IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
    String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
    ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
    ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
    Map<String, Number> numericDocValues = new HashMap<>();
    Map<String, String> binaryDocValues = new HashMap<>();
    for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
        Type type = AggregateFunction.getLuceneType(options, entry.getKey());
        Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
        numericDocValues.put(entry.getKey(), number);
    }
    for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
        binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
    }
    return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ByteBuffer(java.nio.ByteBuffer) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.lucene.document.FieldType)

Example 57 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestFieldCache method test.

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }
    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }
    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }
    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }
    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }
    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }
    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }
    int nTerms = termsIndex.getValueCount();
    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }
    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }
    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");
    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }
    // test bad field
    terms = cache.getTerms(reader, "bogusfield");
    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);
    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }
    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);
    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) LongPoint(org.apache.lucene.document.LongPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)

Example 58 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonexistantFields.

public void testNonexistantFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 59 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestFieldCache method testNonIndexedFields.

public void testNonIndexedFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StoredField("bogusbytes", "bogus"));
    doc.add(new StoredField("bogusshorts", "bogus"));
    doc.add(new StoredField("bogusints", "bogus"));
    doc.add(new StoredField("boguslongs", "bogus"));
    doc.add(new StoredField("bogusfloats", "bogus"));
    doc.add(new StoredField("bogusdoubles", "bogus"));
    doc.add(new StoredField("bogusterms", "bogus"));
    doc.add(new StoredField("bogustermsindex", "bogus"));
    doc.add(new StoredField("bogusmultivalued", "bogus"));
    doc.add(new StoredField("bogusbits", "bogus"));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) StoredField(org.apache.lucene.document.StoredField) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 60 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method doTestSparseNumericBlocksOfVariousBitsPerValue.

private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
    conf.setRAMBufferSizeMB(-1);
    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    Field storedField = newStringField("stored", "", Field.Store.YES);
    Field dvField = new NumericDocValuesField("dv", 0);
    doc.add(storedField);
    doc.add(dvField);
    final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE * 3);
    final LongSupplier longs = blocksOfVariousBPV();
    for (int i = 0; i < numDocs; i++) {
        if (random().nextDouble() > density) {
            writer.addDocument(new Document());
            continue;
        }
        long value = longs.getAsLong();
        storedField.setStringValue(Long.toString(value));
        dvField.setLongValue(value);
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    writer.close();
    // compare
    DirectoryReader ir = DirectoryReader.open(dir);
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        NumericDocValues docValues = DocValues.getNumeric(r, "dv");
        docValues.nextDoc();
        for (int i = 0; i < r.maxDoc(); i++) {
            String storedValue = r.document(i).get("stored");
            if (storedValue == null) {
                assertTrue(docValues.docID() > i);
            } else {
                assertEquals(i, docValues.docID());
                assertEquals(Long.parseLong(storedValue), docValues.longValue());
                docValues.nextDoc();
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
    }
    ir.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) LongSupplier(java.util.function.LongSupplier) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

NumericDocValues (org.apache.lucene.index.NumericDocValues)81 Document (org.apache.lucene.document.Document)30 Directory (org.apache.lucene.store.Directory)29 LeafReader (org.apache.lucene.index.LeafReader)25 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)22 IOException (java.io.IOException)20 BytesRef (org.apache.lucene.util.BytesRef)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 HashSet (java.util.HashSet)16 Bits (org.apache.lucene.util.Bits)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 SortedDocValues (org.apache.lucene.index.SortedDocValues)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)12 IndexReader (org.apache.lucene.index.IndexReader)12 Term (org.apache.lucene.index.Term)12