Examples with NumericDocValues - org.apache.lucene.index.NumericDocValues

Example 11 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class TestPointQueries method verifyLongs.

// verify for long values
private static void verifyLongs(long[] values, int[] ids) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < values.length / 100) {
        iwc.setMaxBufferedDocs(values.length / 100);
    }
    iwc.setCodec(getCodec());
    Directory dir;
    if (values.length > 100000) {
        dir = newMaybeVirusCheckingFSDirectory(createTempDir("TestRangeTree"));
    } else {
        dir = newMaybeVirusCheckingDirectory();
    }
    int missingPct = random().nextInt(100);
    int deletedPct = random().nextInt(100);
    if (VERBOSE) {
        System.out.println("  missingPct=" + missingPct);
        System.out.println("  deletedPct=" + deletedPct);
    }
    BitSet missing = new BitSet();
    BitSet deleted = new BitSet();
    Document doc = null;
    int lastID = -1;
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int ord = 0; ord < values.length; ord++) {
        int id;
        if (ids == null) {
            id = ord;
        } else {
            id = ids[ord];
        }
        if (id != lastID) {
            if (random().nextInt(100) < missingPct) {
                missing.set(id);
                if (VERBOSE) {
                    System.out.println("  missing id=" + id);
                }
            }
            if (doc != null) {
                w.addDocument(doc);
                if (random().nextInt(100) < deletedPct) {
                    int idToDelete = random().nextInt(id);
                    w.deleteDocuments(new Term("id", "" + idToDelete));
                    deleted.set(idToDelete);
                    if (VERBOSE) {
                        System.out.println("  delete id=" + idToDelete);
                    }
                }
            }
            doc = new Document();
            doc.add(newStringField("id", "" + id, Field.Store.NO));
            doc.add(new NumericDocValuesField("id", id));
            lastID = id;
        }
        if (missing.get(id) == false) {
            doc.add(new LongPoint("sn_value", values[id]));
            byte[] bytes = new byte[8];
            NumericUtils.longToSortableBytes(values[id], bytes, 0);
            doc.add(new BinaryPoint("ss_value", bytes));
        }
    }
    w.addDocument(doc);
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("  forceMerge(1)");
        }
        w.forceMerge(1);
    }
    final IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r, false);
    int numThreads = TestUtil.nextInt(random(), 2, 5);
    if (VERBOSE) {
        System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
    }
    List<Thread> threads = new ArrayList<>();
    final int iters = atLeast(100);
    final CountDownLatch startingGun = new CountDownLatch(1);
    final AtomicBoolean failed = new AtomicBoolean();
    for (int i = 0; i < numThreads; i++) {
        Thread thread = new Thread() {

            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    failed.set(true);
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws Exception {
                startingGun.await();
                for (int iter = 0; iter < iters && failed.get() == false; iter++) {
                    Long lower = randomValue();
                    Long upper = randomValue();
                    if (upper < lower) {
                        long x = lower;
                        lower = upper;
                        upper = x;
                    }
                    Query query;
                    if (VERBOSE) {
                        System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
                        byte[] tmp = new byte[8];
                        if (lower != null) {
                            NumericUtils.longToSortableBytes(lower, tmp, 0);
                            System.out.println("  lower bytes=" + Arrays.toString(tmp));
                        }
                        if (upper != null) {
                            NumericUtils.longToSortableBytes(upper, tmp, 0);
                            System.out.println("  upper bytes=" + Arrays.toString(tmp));
                        }
                    }
                    if (random().nextBoolean()) {
                        query = LongPoint.newRangeQuery("sn_value", lower, upper);
                    } else {
                        byte[] lowerBytes = new byte[8];
                        NumericUtils.longToSortableBytes(lower, lowerBytes, 0);
                        byte[] upperBytes = new byte[8];
                        NumericUtils.longToSortableBytes(upper, upperBytes, 0);
                        query = BinaryPoint.newRangeQuery("ss_value", lowerBytes, upperBytes);
                    }
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  using query: " + query);
                    }
                    final BitSet hits = new BitSet();
                    s.search(query, new SimpleCollector() {

                        private int docBase;

                        @Override
                        public boolean needsScores() {
                            return false;
                        }

                        @Override
                        protected void doSetNextReader(LeafReaderContext context) throws IOException {
                            docBase = context.docBase;
                        }

                        @Override
                        public void collect(int doc) {
                            hits.set(docBase + doc);
                        }
                    });
                    if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ":  hitCount: " + hits.cardinality());
                    }
                    NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
                    for (int docID = 0; docID < r.maxDoc(); docID++) {
                        assertEquals(docID, docIDToID.nextDoc());
                        int id = (int) docIDToID.longValue();
                        boolean expected = missing.get(id) == false && deleted.get(id) == false && values[id] >= lower && values[id] <= upper;
                        if (hits.get(docID) != expected) {
                            // We do exact quantized comparison so the bbox query should never disagree:
                            fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.get(id) + " query=" + query);
                        }
                    }
                }
            }
        };
        thread.setName("T" + i);
        thread.start();
        threads.add(thread);
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    IOUtils.close(r, dir);
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) BitSet(java.util.BitSet) Term(org.apache.lucene.index.Term) LongPoint(org.apache.lucene.document.LongPoint) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 12 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class SimpleTextDocValuesWriter method addNumericField.

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    assert fieldSeen(field.name);
    assert field.getDocValuesType() == DocValuesType.NUMERIC || field.hasNorms();
    writeFieldEntry(field, DocValuesType.NUMERIC);
    // first pass to find min/max
    long minValue = Long.MAX_VALUE;
    long maxValue = Long.MIN_VALUE;
    NumericDocValues values = valuesProducer.getNumeric(field);
    int numValues = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
        long v = values.longValue();
        minValue = Math.min(minValue, v);
        maxValue = Math.max(maxValue, v);
        numValues++;
    }
    if (numValues != numDocs) {
        minValue = Math.min(minValue, 0);
        maxValue = Math.max(maxValue, 0);
    }
    // write our minimum value to the .dat, all entries are deltas from that
    SimpleTextUtil.write(data, MINVALUE);
    SimpleTextUtil.write(data, Long.toString(minValue), scratch);
    SimpleTextUtil.writeNewline(data);
    // build up our fixed-width "simple text packed ints"
    // format
    BigInteger maxBig = BigInteger.valueOf(maxValue);
    BigInteger minBig = BigInteger.valueOf(minValue);
    BigInteger diffBig = maxBig.subtract(minBig);
    int maxBytesPerValue = diffBig.toString().length();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < maxBytesPerValue; i++) {
        sb.append('0');
    }
    // write our pattern to the .dat
    SimpleTextUtil.write(data, PATTERN);
    SimpleTextUtil.write(data, sb.toString(), scratch);
    SimpleTextUtil.writeNewline(data);
    final String patternString = sb.toString();
    final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
    int numDocsWritten = 0;
    // second pass to write the values
    values = valuesProducer.getNumeric(field);
    for (int i = 0; i < numDocs; ++i) {
        if (values.docID() < i) {
            values.nextDoc();
            assert values.docID() >= i;
        }
        long value = values.docID() != i ? 0 : values.longValue();
        assert value >= minValue;
        Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
        String s = encoder.format(delta);
        assert s.length() == patternString.length();
        SimpleTextUtil.write(data, s, scratch);
        SimpleTextUtil.writeNewline(data);
        if (values.docID() != i) {
            SimpleTextUtil.write(data, "F", scratch);
        } else {
            SimpleTextUtil.write(data, "T", scratch);
        }
        SimpleTextUtil.writeNewline(data);
        numDocsWritten++;
        assert numDocsWritten <= numDocs;
    }
    assert numDocs == numDocsWritten : "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DecimalFormatSymbols(java.text.DecimalFormatSymbols) DecimalFormat(java.text.DecimalFormat) BigInteger(java.math.BigInteger)

Example 13 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method numericIterable.

/** Converts values from {@link NumericDocValues} into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> numericIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final NumericDocValues values;
            try {
                values = valuesProducer.getNumeric(field);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int docIDUpto = -1;

                @Override
                public boolean hasNext() {
                    return docIDUpto + 1 < maxDoc;
                }

                @Override
                public Number next() {
                    docIDUpto++;
                    if (docIDUpto > values.docID()) {
                        try {
                            values.nextDoc();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                    Number result;
                    if (docIDUpto == values.docID()) {
                        try {
                            result = values.longValue();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    } else {
                        result = null;
                    }
                    return result;
                }
            };
        }
    };
}

Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Example 14 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method normsIterable.

/** Converts norms into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link NumericDocValues} instead. */
@Deprecated
public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final NumericDocValues values;
            try {
                values = normsProducer.getNorms(field);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int docIDUpto = -1;

                @Override
                public boolean hasNext() {
                    return docIDUpto + 1 < maxDoc;
                }

                @Override
                public Number next() {
                    docIDUpto++;
                    if (docIDUpto > values.docID()) {
                        try {
                            values.nextDoc();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                    Number result;
                    if (docIDUpto == values.docID()) {
                        try {
                            result = values.longValue();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    } else {
                        // Unlike NumericDocValues, norms used to return 0 for missing values:
                        result = 0;
                    }
                    return result;
                }
            };
        }
    };
}

Example 15 with NumericDocValues

use of org.apache.lucene.index.NumericDocValues in project lucene-solr by apache.

the class SortedNumericSelector method wrap.

/** 
   * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector 
   * and numericType.
   */
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
    if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) {
        throw new IllegalArgumentException("numericType must be a numeric type");
    }
    final NumericDocValues view;
    NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
    if (singleton != null) {
        // it's actually single-valued in practice, but indexed as multi-valued,
        // so just sort on the underlying single-valued dv directly.
        // regardless of selector type, this optimization is safe!
        view = singleton;
    } else {
        switch(selector) {
            case MIN:
                view = new MinValue(sortedNumeric);
                break;
            case MAX:
                view = new MaxValue(sortedNumeric);
                break;
            default:
                throw new AssertionError();
        }
    }
    // undo the numericutils sortability
    switch(numericType) {
        case FLOAT:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableFloatBits((int) in.longValue());
                }
            };
        case DOUBLE:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableDoubleBits(in.longValue());
                }
            };
        default:
            return view;
    }
}

Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues)

Aggregations

NumericDocValues (org.apache.lucene.index.NumericDocValues)81 Document (org.apache.lucene.document.Document)30 Directory (org.apache.lucene.store.Directory)29 LeafReader (org.apache.lucene.index.LeafReader)25 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)23 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)22 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)22 IOException (java.io.IOException)20 BytesRef (org.apache.lucene.util.BytesRef)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)17 HashSet (java.util.HashSet)16 Bits (org.apache.lucene.util.Bits)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 SortedDocValues (org.apache.lucene.index.SortedDocValues)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)12 IndexReader (org.apache.lucene.index.IndexReader)12 Term (org.apache.lucene.index.Term)12