Search in sources :

Example 46 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.

private void doTestSparseDocValuesVsStoredFields() throws Exception {
    final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = random().nextLong();
    }
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    // sparse compression is only enabled if less than 1% of docs have a value
    final int avgGap = 100;
    final int numDocs = atLeast(200);
    for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
        writer.addDocument(new Document());
    }
    final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        // single-valued
        long docValue = values[random().nextInt(values.length)];
        doc.add(new NumericDocValuesField("numeric", docValue));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
        doc.add(new StoredField("value", docValue));
        // multi-valued
        final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
        for (int j = 0; j < numValues; ++j) {
            docValue = values[random().nextInt(values.length)];
            doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
            doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
            doc.add(new StoredField("values", docValue));
        }
        writer.addDocument(doc);
        // add a gap
        for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
            writer.addDocument(new Document());
        }
    }
    if (random().nextBoolean()) {
        writer.forceMerge(1);
    }
    final IndexReader indexReader = writer.getReader();
    writer.close();
    for (LeafReaderContext context : indexReader.leaves()) {
        final LeafReader reader = context.reader();
        final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
        final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
        final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
        final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
        final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
        for (int i = 0; i < reader.maxDoc(); ++i) {
            final Document doc = reader.document(i);
            final IndexableField valueField = doc.getField("value");
            final Long value = valueField == null ? null : valueField.numericValue().longValue();
            if (value == null) {
                assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
            } else {
                assertEquals(i, numeric.nextDoc());
                assertEquals(i, binary.nextDoc());
                assertEquals(i, sorted.nextDoc());
                assertEquals(value.longValue(), numeric.longValue());
                assertTrue(sorted.ordValue() >= 0);
                assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
                assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
            }
            final IndexableField[] valuesFields = doc.getFields("values");
            if (valuesFields.length == 0) {
                assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
            } else {
                final Set<Long> valueSet = new HashSet<>();
                for (IndexableField sf : valuesFields) {
                    valueSet.add(sf.numericValue().longValue());
                }
                assertEquals(i, sortedNumeric.nextDoc());
                assertEquals(valuesFields.length, sortedNumeric.docValueCount());
                for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
                    assertTrue(valueSet.contains(sortedNumeric.nextValue()));
                }
                assertEquals(i, sortedSet.nextDoc());
                int sortedSetCount = 0;
                while (true) {
                    long ord = sortedSet.nextOrd();
                    if (ord == SortedSetDocValues.NO_MORE_ORDS) {
                        break;
                    }
                    assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
                    sortedSetCount++;
                }
                assertEquals(valueSet.size(), sortedSetCount);
            }
        }
    }
    indexReader.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 47 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestPointQueries method verifyLongs.

// verify for long values
private static void verifyLongs(long[] values, int[] ids) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < values.length / 100) {
        iwc.setMaxBufferedDocs(values.length / 100);
    }
    iwc.setCodec(getCodec());
    Directory dir;
    if (values.length > 100000) {
        dir = newMaybeVirusCheckingFSDirectory(createTempDir("TestRangeTree"));
    } else {
        dir = newMaybeVirusCheckingDirectory();
    }
    int missingPct = random().nextInt(100);
    int deletedPct = random().nextInt(100);
    if (VERBOSE) {
        System.out.println("  missingPct=" + missingPct);
        System.out.println("  deletedPct=" + deletedPct);
    }
    BitSet missing = new BitSet();
    BitSet deleted = new BitSet();
    Document doc = null;
    int lastID = -1;
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int ord = 0; ord < values.length; ord++) {
        int id;
        if (ids == null) {
            id = ord;
        } else {
            id = ids[ord];
        }
        if (id != lastID) {
            if (random().nextInt(100) < missingPct) {
                missing.set(id);
                if (VERBOSE) {
                    System.out.println("  missing id=" + id);
                }
            }
            if (doc != null) {
                w.addDocument(doc);
                if (random().nextInt(100) < deletedPct) {
                    int idToDelete = random().nextInt(id);
                    w.deleteDocuments(new Term("id", "" + idToDelete));
                    deleted.set(idToDelete);
                    if (VERBOSE) {
                        System.out.println("  delete id=" + idToDelete);
                    }
                }
            }
            doc = new Document();
            doc.add(newStringField("id", "" + id, Field.Store.NO));
            doc.add(new NumericDocValuesField("id", id));
            lastID = id;
        }
        if (missing.get(id) == false) {
            doc.add(new LongPoint("sn_value", values[id]));
            byte[] bytes = new byte[8];
            NumericUtils.longToSortableBytes(values[id], bytes, 0);
            doc.add(new BinaryPoint("ss_value", bytes));
        }
    }
    w.addDocument(doc);
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("  forceMerge(1)");
        }
        w.forceMerge(1);
    }
    final IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r, false);
    int numThreads = TestUtil.nextInt(random(), 2, 5);
    if (VERBOSE) {
        System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
    }
    List<Thread> threads = new ArrayList<>();
    final int iters = atLeast(100);
    final CountDownLatch startingGun = new CountDownLatch(1);
    final AtomicBoolean failed = new AtomicBoolean();
    for (int i = 0; i < numThreads; i++) {
        Thread thread = new Thread() {

            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    failed.set(true);
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws Exception {
                startingGun.await();
                for (int iter = 0; iter < iters && failed.get() == false; iter++) {
                    Long lower = randomValue();
                    Long upper = randomValue();
                    if (upper < lower) {
                        long x = lower;
                        lower = upper;
                        upper = x;
                    }
                    Query query;
                    if (VERBOSE) {
                        System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
                        byte[] tmp = new byte[8];
                        if (lower != null) {
                            NumericUtils.longToSortableBytes(lower, tmp, 0);
                            System.out.println("  lower bytes=" + Arrays.toString(tmp));
                        }
                        if (upper != null) {
                            NumericUtils.longToSortableBytes(upper, tmp, 0);
                            System.out.println("  upper bytes=" + Arrays.toString(tmp));
                        }
                    }
                    if (random().nextBoolean()) {
                        query = LongPoint.newRangeQuery("sn_value", lower, upper);
                    } else {
                        byte[] lowerBytes = new byte[8];
                        NumericUtils.longToSortableBytes(lower, lowerBytes, 0);
                        byte[] upperBytes = new byte[8];
                        NumericUtils.longToSortableBytes(upper, upperBytes, 0);
                        query = BinaryPoint.newRangeQuery("ss_value", lowerBytes, upperBytes);
                    }
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  using query: " + query);
                    }
                    final BitSet hits = new BitSet();
                    s.search(query, new SimpleCollector() {

                        private int docBase;

                        @Override
                        public boolean needsScores() {
                            return false;
                        }

                        @Override
                        protected void doSetNextReader(LeafReaderContext context) throws IOException {
                            docBase = context.docBase;
                        }

                        @Override
                        public void collect(int doc) {
                            hits.set(docBase + doc);
                        }
                    });
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  hitCount: " + hits.cardinality());
                    }
                    NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
                    for (int docID = 0; docID < r.maxDoc(); docID++) {
                        assertEquals(docID, docIDToID.nextDoc());
                        int id = (int) docIDToID.longValue();
                        boolean expected = missing.get(id) == false && deleted.get(id) == false && values[id] >= lower && values[id] <= upper;
                        if (hits.get(docID) != expected) {
                            // We do exact quantized comparison so the bbox query should never disagree:
                            fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.get(id) + " query=" + query);
                        }
                    }
                }
            }
        };
        thread.setName("T" + i);
        thread.start();
        threads.add(thread);
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    IOUtils.close(r, dir);
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) BitSet(java.util.BitSet) Term(org.apache.lucene.index.Term) LongPoint(org.apache.lucene.document.LongPoint) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 48 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class SimpleTextDocValuesWriter method addNumericField.

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    assert fieldSeen(field.name);
    assert field.getDocValuesType() == DocValuesType.NUMERIC || field.hasNorms();
    writeFieldEntry(field, DocValuesType.NUMERIC);
    // first pass to find min/max
    long minValue = Long.MAX_VALUE;
    long maxValue = Long.MIN_VALUE;
    NumericDocValues values = valuesProducer.getNumeric(field);
    int numValues = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        long v = values.longValue();
        minValue = Math.min(minValue, v);
        maxValue = Math.max(maxValue, v);
        numValues++;
    }
    if (numValues != numDocs) {
        minValue = Math.min(minValue, 0);
        maxValue = Math.max(maxValue, 0);
    }
    // write our minimum value to the .dat, all entries are deltas from that
    SimpleTextUtil.write(data, MINVALUE);
    SimpleTextUtil.write(data, Long.toString(minValue), scratch);
    SimpleTextUtil.writeNewline(data);
    // build up our fixed-width "simple text packed ints"
    // format
    BigInteger maxBig = BigInteger.valueOf(maxValue);
    BigInteger minBig = BigInteger.valueOf(minValue);
    BigInteger diffBig = maxBig.subtract(minBig);
    int maxBytesPerValue = diffBig.toString().length();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < maxBytesPerValue; i++) {
        sb.append('0');
    }
    // write our pattern to the .dat
    SimpleTextUtil.write(data, PATTERN);
    SimpleTextUtil.write(data, sb.toString(), scratch);
    SimpleTextUtil.writeNewline(data);
    final String patternString = sb.toString();
    final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
    int numDocsWritten = 0;
    // second pass to write the values
    values = valuesProducer.getNumeric(field);
    for (int i = 0; i < numDocs; ++i) {
        if (values.docID() < i) {
            values.nextDoc();
            assert values.docID() >= i;
        }
        long value = values.docID() != i ? 0 : values.longValue();
        assert value >= minValue;
        Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
        String s = encoder.format(delta);
        assert s.length() == patternString.length();
        SimpleTextUtil.write(data, s, scratch);
        SimpleTextUtil.writeNewline(data);
        if (values.docID() != i) {
            SimpleTextUtil.write(data, "F", scratch);
        } else {
            SimpleTextUtil.write(data, "T", scratch);
        }
        SimpleTextUtil.writeNewline(data);
        numDocsWritten++;
        assert numDocsWritten <= numDocs;
    }
    assert numDocs == numDocsWritten : "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DecimalFormatSymbols(java.text.DecimalFormatSymbols) DecimalFormat(java.text.DecimalFormat) BigInteger(java.math.BigInteger)

Example 49 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method numericIterable.

/** Converts values from {@link NumericDocValues} into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> numericIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final NumericDocValues values;
            try {
                values = valuesProducer.getNumeric(field);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int docIDUpto = -1;

                @Override
                public boolean hasNext() {
                    return docIDUpto + 1 < maxDoc;
                }

                @Override
                public Number next() {
                    docIDUpto++;
                    if (docIDUpto > values.docID()) {
                        try {
                            values.nextDoc();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                    Number result;
                    if (docIDUpto == values.docID()) {
                        try {
                            result = values.longValue();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    } else {
                        result = null;
                    }
                    return result;
                }
            };
        }
    };
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Example 50 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method normsIterable.

/** Converts norms into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final NumericDocValues values;
            try {
                values = normsProducer.getNorms(field);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int docIDUpto = -1;

                @Override
                public boolean hasNext() {
                    return docIDUpto + 1 < maxDoc;
                }

                @Override
                public Number next() {
                    docIDUpto++;
                    if (docIDUpto > values.docID()) {
                        try {
                            values.nextDoc();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                    Number result;
                    if (docIDUpto == values.docID()) {
                        try {
                            result = values.longValue();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    } else {
                        // Unlike NumericDocValues, norms used to return 0 for missing values:
                        result = 0;
                    }
                    return result;
                }
            };
        }
    };
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Aggregations

NumericDocValues (org.apache.lucene.index.NumericDocValues)81 Document (org.apache.lucene.document.Document)30 Directory (org.apache.lucene.store.Directory)29 LeafReader (org.apache.lucene.index.LeafReader)25 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)22 IOException (java.io.IOException)20 BytesRef (org.apache.lucene.util.BytesRef)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 HashSet (java.util.HashSet)16 Bits (org.apache.lucene.util.Bits)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 SortedDocValues (org.apache.lucene.index.SortedDocValues)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)12 IndexReader (org.apache.lucene.index.IndexReader)12 Term (org.apache.lucene.index.Term)12