Search in sources :

Example 21 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.

private void doTestSparseDocValuesVsStoredFields() throws Exception {
    final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = random().nextLong();
    }
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    // sparse compression is only enabled if less than 1% of docs have a value
    final int avgGap = 100;
    final int numDocs = atLeast(200);
    for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
        writer.addDocument(new Document());
    }
    final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        // single-valued
        long docValue = values[random().nextInt(values.length)];
        doc.add(new NumericDocValuesField("numeric", docValue));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
        doc.add(new StoredField("value", docValue));
        // multi-valued
        final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
        for (int j = 0; j < numValues; ++j) {
            docValue = values[random().nextInt(values.length)];
            doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
            doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
            doc.add(new StoredField("values", docValue));
        }
        writer.addDocument(doc);
        // add a gap
        for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
            writer.addDocument(new Document());
        }
    }
    if (random().nextBoolean()) {
        writer.forceMerge(1);
    }
    final IndexReader indexReader = writer.getReader();
    writer.close();
    for (LeafReaderContext context : indexReader.leaves()) {
        final LeafReader reader = context.reader();
        final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
        final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
        final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
        final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
        final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
        for (int i = 0; i < reader.maxDoc(); ++i) {
            final Document doc = reader.document(i);
            final IndexableField valueField = doc.getField("value");
            final Long value = valueField == null ? null : valueField.numericValue().longValue();
            if (value == null) {
                assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
            } else {
                assertEquals(i, numeric.nextDoc());
                assertEquals(i, binary.nextDoc());
                assertEquals(i, sorted.nextDoc());
                assertEquals(value.longValue(), numeric.longValue());
                assertTrue(sorted.ordValue() >= 0);
                assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
                assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
            }
            final IndexableField[] valuesFields = doc.getFields("values");
            if (valuesFields.length == 0) {
                assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
            } else {
                final Set<Long> valueSet = new HashSet<>();
                for (IndexableField sf : valuesFields) {
                    valueSet.add(sf.numericValue().longValue());
                }
                assertEquals(i, sortedNumeric.nextDoc());
                assertEquals(valuesFields.length, sortedNumeric.docValueCount());
                for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
                    assertTrue(valueSet.contains(sortedNumeric.nextValue()));
                }
                assertEquals(i, sortedSet.nextDoc());
                int sortedSetCount = 0;
                while (true) {
                    long ord = sortedSet.nextOrd();
                    if (ord == SortedSetDocValues.NO_MORE_ORDS) {
                        break;
                    }
                    assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
                    sortedSetCount++;
                }
                assertEquals(valueSet.size(), sortedSetCount);
            }
        }
    }
    indexReader.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 22 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method sortedNumericToValues.

/** Converts all concatenated values (in docID order) from {@link SortedNumericDocValues} into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedNumericToValues(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final SortedNumericDocValues values;
            try {
                values = valuesProducer.getSortedNumeric(fieldInfo);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private boolean nextIsSet;

                private int nextCount;

                private int upto;

                private long nextValue;

                private void setNext() {
                    try {
                        if (nextIsSet == false) {
                            if (upto == nextCount) {
                                values.nextDoc();
                                if (values.docID() == NO_MORE_DOCS) {
                                    nextCount = 0;
                                    nextIsSet = false;
                                    return;
                                } else {
                                    nextCount = values.docValueCount();
                                }
                                upto = 0;
                            }
                            nextValue = values.nextValue();
                            upto++;
                            nextIsSet = true;
                        }
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }

                @Override
                public boolean hasNext() {
                    setNext();
                    return nextCount != 0;
                }

                @Override
                public Number next() {
                    setNext();
                    assert nextCount != 0;
                    nextIsSet = false;
                    return nextValue;
                }
            };
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Example 23 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.

the class SortedNumericSelector method wrap.

/** 
   * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector 
   * and numericType.
   */
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
    if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) {
        throw new IllegalArgumentException("numericType must be a numeric type");
    }
    final NumericDocValues view;
    NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
    if (singleton != null) {
        // it's actually single-valued in practice, but indexed as multi-valued,
        // so just sort on the underlying single-valued dv directly.
        // regardless of selector type, this optimization is safe!
        view = singleton;
    } else {
        switch(selector) {
            case MIN:
                view = new MinValue(sortedNumeric);
                break;
            case MAX:
                view = new MaxValue(sortedNumeric);
                break;
            default:
                throw new AssertionError();
        }
    }
    // undo the numericutils sortability
    switch(numericType) {
        case FLOAT:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableFloatBits((int) in.longValue());
                }
            };
        case DOUBLE:
            return new FilterNumericDocValues(view) {

                @Override
                public long longValue() throws IOException {
                    return NumericUtils.sortableDoubleBits(in.longValue());
                }
            };
        default:
            return view;
    }
}
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues)

Example 24 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project elasticsearch by elastic.

the class GeoHashGridAggregator method getLeafCollector.

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
    final SortedNumericDocValues values = valuesSource.longValues(ctx);
    return new LeafBucketCollectorBase(sub, null) {

        @Override
        public void collect(int doc, long bucket) throws IOException {
            assert bucket == 0;
            values.setDocument(doc);
            final int valuesCount = values.count();
            long previous = Long.MAX_VALUE;
            for (int i = 0; i < valuesCount; ++i) {
                final long val = values.valueAt(i);
                if (previous != val || i == 0) {
                    long bucketOrdinal = bucketOrds.add(val);
                    if (bucketOrdinal < 0) {
                        // already seen
                        bucketOrdinal = -1 - bucketOrdinal;
                        collectExistingBucket(sub, doc, bucketOrdinal);
                    } else {
                        collectBucket(sub, doc, bucketOrdinal);
                    }
                    previous = val;
                }
            }
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafBucketCollectorBase(org.elasticsearch.search.aggregations.LeafBucketCollectorBase)

Example 25 with SortedNumericDocValues

use of org.apache.lucene.index.SortedNumericDocValues in project elasticsearch by elastic.

the class Versions method loadSeqNo.

/**
     * Returns the sequence number for the given uid term, returning
     * {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
     */
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
    assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
    List<LeafReaderContext> leaves = reader.leaves();
    if (leaves.isEmpty()) {
        return SequenceNumbersService.UNASSIGNED_SEQ_NO;
    }
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        LeafReader leaf = leaves.get(i).reader();
        Bits liveDocs = leaf.getLiveDocs();
        TermsEnum termsEnum = null;
        SortedNumericDocValues dvField = null;
        PostingsEnum docsEnum = null;
        final Fields fields = leaf.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                termsEnum = terms.iterator();
                assert termsEnum != null;
                dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
                assert dvField != null;
                final BytesRef id = term.bytes();
                if (termsEnum.seekExact(id)) {
                    // there may be more than one matching docID, in the
                    // case of nested docs, so we want the last one:
                    docsEnum = termsEnum.postings(docsEnum, 0);
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                        if (liveDocs != null && liveDocs.get(d) == false) {
                            continue;
                        }
                        docID = d;
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        dvField.setDocument(docID);
                        assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count();
                        return dvField.valueAt(0);
                    }
                }
            }
        }
    }
    return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Fields(org.apache.lucene.index.Fields) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)44 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)23 NumericDocValues (org.apache.lucene.index.NumericDocValues)13 LeafReader (org.apache.lucene.index.LeafReader)10 BytesRef (org.apache.lucene.util.BytesRef)7 Document (org.apache.lucene.document.Document)6 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)6 Directory (org.apache.lucene.store.Directory)6 IndexService (org.elasticsearch.index.IndexService)6 Searcher (org.elasticsearch.index.engine.Engine.Searcher)6 QueryShardContext (org.elasticsearch.index.query.QueryShardContext)6 IOException (java.io.IOException)5 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 SortedDocValues (org.apache.lucene.index.SortedDocValues)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 BitSet (org.apache.lucene.util.BitSet)4