use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.
private void doTestSparseDocValuesVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
// single-valued
long docValue = values[random().nextInt(values.length)];
doc.add(new NumericDocValuesField("numeric", docValue));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("value", docValue));
// multi-valued
final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
docValue = values[random().nextInt(values.length)];
doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("values", docValue));
}
writer.addDocument(doc);
// add a gap
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
final IndexReader indexReader = writer.getReader();
writer.close();
for (LeafReaderContext context : indexReader.leaves()) {
final LeafReader reader = context.reader();
final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final IndexableField valueField = doc.getField("value");
final Long value = valueField == null ? null : valueField.numericValue().longValue();
if (value == null) {
assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
} else {
assertEquals(i, numeric.nextDoc());
assertEquals(i, binary.nextDoc());
assertEquals(i, sorted.nextDoc());
assertEquals(value.longValue(), numeric.longValue());
assertTrue(sorted.ordValue() >= 0);
assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
}
final IndexableField[] valuesFields = doc.getFields("values");
if (valuesFields.length == 0) {
assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
} else {
final Set<Long> valueSet = new HashSet<>();
for (IndexableField sf : valuesFields) {
valueSet.add(sf.numericValue().longValue());
}
assertEquals(i, sortedNumeric.nextDoc());
assertEquals(valuesFields.length, sortedNumeric.docValueCount());
for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
break;
}
assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
sortedSetCount++;
}
assertEquals(valueSet.size(), sortedSetCount);
}
}
}
indexReader.close();
dir.close();
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method sortedNumericToValues.
/** Converts all concatenated values (in docID order) from {@link SortedNumericDocValues} into {@code Iterable<Number>}.
*
* @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedNumericToValues(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final SortedNumericDocValues values;
try {
values = valuesProducer.getSortedNumeric(fieldInfo);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private boolean nextIsSet;
private int nextCount;
private int upto;
private long nextValue;
private void setNext() {
try {
if (nextIsSet == false) {
if (upto == nextCount) {
values.nextDoc();
if (values.docID() == NO_MORE_DOCS) {
nextCount = 0;
nextIsSet = false;
return;
} else {
nextCount = values.docValueCount();
}
upto = 0;
}
nextValue = values.nextValue();
upto++;
nextIsSet = true;
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
@Override
public boolean hasNext() {
setNext();
return nextCount != 0;
}
@Override
public Number next() {
setNext();
assert nextCount != 0;
nextIsSet = false;
return nextValue;
}
};
}
};
}
use of org.apache.lucene.index.SortedNumericDocValues in project lucene-solr by apache.
the class SortedNumericSelector method wrap.
/**
* Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector
* and numericType.
*/
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) {
throw new IllegalArgumentException("numericType must be a numeric type");
}
final NumericDocValues view;
NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
view = singleton;
} else {
switch(selector) {
case MIN:
view = new MinValue(sortedNumeric);
break;
case MAX:
view = new MaxValue(sortedNumeric);
break;
default:
throw new AssertionError();
}
}
// undo the numericutils sortability
switch(numericType) {
case FLOAT:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableFloatBits((int) in.longValue());
}
};
case DOUBLE:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableDoubleBits(in.longValue());
}
};
default:
return view;
}
}
use of org.apache.lucene.index.SortedNumericDocValues in project elasticsearch by elastic.
the class GeoHashGridAggregator method getLeafCollector.
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
final SortedNumericDocValues values = valuesSource.longValues(ctx);
return new LeafBucketCollectorBase(sub, null) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
values.setDocument(doc);
final int valuesCount = values.count();
long previous = Long.MAX_VALUE;
for (int i = 0; i < valuesCount; ++i) {
final long val = values.valueAt(i);
if (previous != val || i == 0) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) {
// already seen
bucketOrdinal = -1 - bucketOrdinal;
collectExistingBucket(sub, doc, bucketOrdinal);
} else {
collectBucket(sub, doc, bucketOrdinal);
}
previous = val;
}
}
}
};
}
use of org.apache.lucene.index.SortedNumericDocValues in project elasticsearch by elastic.
the class Versions method loadSeqNo.
/**
* Returns the sequence number for the given uid term, returning
* {@code SequenceNumbersService.UNASSIGNED_SEQ_NO} if none is found.
*/
public static long loadSeqNo(IndexReader reader, Term term) throws IOException {
assert term.field().equals(UidFieldMapper.NAME) : "can only load _seq_no by uid";
List<LeafReaderContext> leaves = reader.leaves();
if (leaves.isEmpty()) {
return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
// which are likely to be in the last segments
for (int i = leaves.size() - 1; i >= 0; i--) {
LeafReader leaf = leaves.get(i).reader();
Bits liveDocs = leaf.getLiveDocs();
TermsEnum termsEnum = null;
SortedNumericDocValues dvField = null;
PostingsEnum docsEnum = null;
final Fields fields = leaf.fields();
if (fields != null) {
Terms terms = fields.terms(UidFieldMapper.NAME);
if (terms != null) {
termsEnum = terms.iterator();
assert termsEnum != null;
dvField = leaf.getSortedNumericDocValues(SeqNoFieldMapper.NAME);
assert dvField != null;
final BytesRef id = term.bytes();
if (termsEnum.seekExact(id)) {
// there may be more than one matching docID, in the
// case of nested docs, so we want the last one:
docsEnum = termsEnum.postings(docsEnum, 0);
int docID = DocIdSetIterator.NO_MORE_DOCS;
for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
docID = d;
}
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
dvField.setDocument(docID);
assert dvField.count() == 1 : "expected only a single value for _seq_no but got " + dvField.count();
return dvField.valueAt(0);
}
}
}
}
}
return SequenceNumbersService.UNASSIGNED_SEQ_NO;
}
Aggregations