use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class SimpleTextDocValuesWriter method addSortedField.
@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.SORTED;
writeFieldEntry(field, DocValuesType.SORTED);
int valueCount = 0;
int maxLength = -1;
TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
for (BytesRef value = terms.next(); value != null; value = terms.next()) {
maxLength = Math.max(maxLength, value.length);
valueCount++;
}
// write numValues
SimpleTextUtil.write(data, NUMVALUES);
SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
SimpleTextUtil.writeNewline(data);
int maxBytesLength = Integer.toString(maxLength).length();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < maxBytesLength; i++) {
sb.append('0');
}
// write our pattern for encoding lengths
SimpleTextUtil.write(data, PATTERN);
SimpleTextUtil.write(data, sb.toString(), scratch);
SimpleTextUtil.writeNewline(data);
final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
int maxOrdBytes = Long.toString(valueCount + 1L).length();
sb.setLength(0);
for (int i = 0; i < maxOrdBytes; i++) {
sb.append('0');
}
// write our pattern for ords
SimpleTextUtil.write(data, ORDPATTERN);
SimpleTextUtil.write(data, sb.toString(), scratch);
SimpleTextUtil.writeNewline(data);
final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
// for asserts:
int valuesSeen = 0;
terms = valuesProducer.getSorted(field).termsEnum();
for (BytesRef value = terms.next(); value != null; value = terms.next()) {
// write length
SimpleTextUtil.write(data, LENGTH);
SimpleTextUtil.write(data, encoder.format(value.length), scratch);
SimpleTextUtil.writeNewline(data);
// write bytes -- don't use SimpleText.write
// because it escapes:
data.writeBytes(value.bytes, value.offset, value.length);
// pad to fit
for (int i = value.length; i < maxLength; i++) {
data.writeByte((byte) ' ');
}
SimpleTextUtil.writeNewline(data);
valuesSeen++;
assert valuesSeen <= valueCount;
}
assert valuesSeen == valueCount;
SortedDocValues values = valuesProducer.getSorted(field);
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
assert values.docID() >= i;
}
int ord = -1;
if (values.docID() == i) {
ord = values.ordValue();
}
SimpleTextUtil.write(data, ordEncoder.format(ord + 1L), scratch);
SimpleTextUtil.writeNewline(data);
}
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestCollationDocValuesField method doTestRanges.
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception {
SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
for (int docID = 0; docID < is.getIndexReader().maxDoc(); docID++) {
Document doc = is.doc(docID);
String s = doc.getField("field").stringValue();
boolean collatorAccepts = collate(collator, s, startPoint) >= 0 && collate(collator, s, endPoint) <= 0;
assertEquals(docID, dvs.nextDoc());
BytesRef br = dvs.binaryValue();
boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
assertEquals(startPoint + " <= " + s + " <= " + endPoint, collatorAccepts, luceneAccepts);
}
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class ParentToChildrenAggregator method doPostCollection.
@Override
protected void doPostCollection() throws IOException {
IndexReader indexReader = context().searcher().getIndexReader();
for (LeafReaderContext ctx : indexReader.leaves()) {
Scorer childDocsScorer = childFilter.scorer(ctx);
if (childDocsScorer == null) {
continue;
}
DocIdSetIterator childDocsIter = childDocsScorer.iterator();
final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
// Set the scorer, since we now replay only the child docIds
sub.setScorer(new ConstantScoreScorer(null, 1f, childDocsIter));
final Bits liveDocs = ctx.reader().getLiveDocs();
for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) {
if (liveDocs != null && liveDocs.get(docId) == false) {
continue;
}
long globalOrdinal = globalOrdinals.getOrd(docId);
if (globalOrdinal != -1) {
long bucketOrd = parentOrdToBuckets.get(globalOrdinal);
if (bucketOrd != -1) {
collectBucket(sub, docId, bucketOrd);
if (multipleBucketsPerParentOrd) {
long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
if (otherBucketOrds != null) {
for (long otherBucketOrd : otherBucketOrds) {
collectBucket(sub, docId, otherBucketOrd);
}
}
}
}
}
}
}
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class ReplaceMissingTests method test.
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("cat")));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("dog")));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(reader);
SortedDocValues raw = ar.getSortedDocValues("field");
assertEquals(2, raw.getValueCount());
// existing values
SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat"));
assertEquals(2, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(0, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog"));
assertEquals(2, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
// non-existing values
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple"));
assertEquals(3, dv.getValueCount());
assertEquals("apple", dv.lookupOrd(0).utf8ToString());
assertEquals("cat", dv.lookupOrd(1).utf8ToString());
assertEquals("dog", dv.lookupOrd(2).utf8ToString());
assertEquals(1, dv.getOrd(0));
assertEquals(0, dv.getOrd(1));
assertEquals(2, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company"));
assertEquals(3, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("company", dv.lookupOrd(1).utf8ToString());
assertEquals("dog", dv.lookupOrd(2).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getOrd(1));
assertEquals(2, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay"));
assertEquals(3, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals("ebay", dv.lookupOrd(2).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(2, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
reader.close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class MultiOrdinalsTests method testRandomValues.
public void testRandomValues() throws IOException {
Random random = random();
int numDocs = 100 + random.nextInt(1000);
int numOrdinals = 1 + random.nextInt(200);
int numValues = 100 + random.nextInt(100000);
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
Set<OrdAndId> ordsAndIdSet = new HashSet<>();
for (int i = 0; i < numValues; i++) {
ordsAndIdSet.add(new OrdAndId(random.nextInt(numOrdinals), random.nextInt(numDocs)));
}
List<OrdAndId> ordsAndIds = new ArrayList<>(ordsAndIdSet);
Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {
@Override
public int compare(OrdAndId o1, OrdAndId o2) {
if (o1.ord < o2.ord) {
return -1;
}
if (o1.ord == o2.ord) {
if (o1.id < o2.id) {
return -1;
}
if (o1.id > o2.id) {
return 1;
}
return 0;
}
return 1;
}
});
long lastOrd = -1;
for (OrdAndId ordAndId : ordsAndIds) {
if (lastOrd != ordAndId.ord) {
lastOrd = ordAndId.ord;
builder.nextOrdinal();
}
// remap the ordinals in case we have gaps?
ordAndId.ord = builder.currentOrdinal();
builder.addDoc(ordAndId.id);
}
Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {
@Override
public int compare(OrdAndId o1, OrdAndId o2) {
if (o1.id < o2.id) {
return -1;
}
if (o1.id == o2.id) {
if (o1.ord < o2.ord) {
return -1;
}
if (o1.ord > o2.ord) {
return 1;
}
return 0;
}
return 1;
}
});
Ordinals ords = creationMultiOrdinals(builder);
RandomAccessOrds docs = ords.ordinals();
final SortedDocValues singleOrds = MultiValueMode.MIN.select(docs);
int docId = ordsAndIds.get(0).id;
List<Long> docOrds = new ArrayList<>();
for (OrdAndId ordAndId : ordsAndIds) {
if (docId == ordAndId.id) {
docOrds.add(ordAndId.ord);
} else {
if (!docOrds.isEmpty()) {
assertThat((long) singleOrds.getOrd(docId), equalTo(docOrds.get(0)));
docs.setDocument(docId);
final int numOrds = docs.cardinality();
assertThat(numOrds, equalTo(docOrds.size()));
for (int i = 0; i < numOrds; i++) {
assertThat(docs.nextOrd(), equalTo(docOrds.get(i)));
}
final long[] array = new long[docOrds.size()];
for (int i = 0; i < array.length; i++) {
array[i] = docOrds.get(i);
}
assertIter(docs, docId, array);
}
for (int i = docId + 1; i < ordAndId.id; i++) {
assertThat((long) singleOrds.getOrd(i), equalTo(RandomAccessOrds.NO_MORE_ORDS));
}
docId = ordAndId.id;
docOrds.clear();
docOrds.add(ordAndId.ord);
}
}
}
Aggregations