use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.
the class DocValuesConsumer method mergeSortedSetField.
/**
* Merges the sortedset docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedSetField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException {
List<SortedSetDocValues> toMerge = new ArrayList<>();
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
toMerge.add(values);
}
// step 1: iterate thru each sub and mark terms still in use
TermsEnum[] liveTerms = new TermsEnum[toMerge.size()];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < liveTerms.length; sub++) {
SortedSetDocValues dv = toMerge.get(sub);
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
weights[sub] = dv.getValueCount();
} else {
LongBitSet bitset = new LongBitSet(dv.getValueCount());
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
long ord;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bitset.set(ord);
}
}
}
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
weights[sub] = bitset.cardinality();
}
}
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
// step 3: add field
addSortedSetField(mergeFieldInfo, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
if (fieldInfo != mergeFieldInfo) {
throw new IllegalArgumentException("wrong FieldInfo");
}
// We must make new iterators + DocIDMerger for each iterator:
List<SortedSetDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
values = docValuesProducer.getSortedSet(readerFieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedSet();
}
cost += values.cost();
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
final long finalCost = cost;
return new SortedSetDocValues() {
private int docID = -1;
private SortedSetDocValuesSub currentSub;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
currentSub = docIDMerger.next();
if (currentSub == null) {
docID = NO_MORE_DOCS;
} else {
docID = currentSub.mappedDocID;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long nextOrd() throws IOException {
long subOrd = currentSub.values.nextOrd();
if (subOrd == NO_MORE_ORDS) {
return NO_MORE_ORDS;
}
return currentSub.map.get(subOrd);
}
@Override
public long cost() {
return finalCost;
}
@Override
public BytesRef lookupOrd(long ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
long segmentOrd = map.getFirstSegmentOrd(ord);
return toMerge.get(segmentNumber).lookupOrd(segmentOrd);
}
@Override
public long getValueCount() {
return map.getValueCount();
}
};
}
});
}
use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.
the class TestOrdsBlockTree method testBasic.
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "a b c", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
TermsEnum te = MultiFields.getTerms(r, "field").iterator();
// Test next()
assertEquals(new BytesRef("a"), te.next());
assertEquals(0L, te.ord());
assertEquals(new BytesRef("b"), te.next());
assertEquals(1L, te.ord());
assertEquals(new BytesRef("c"), te.next());
assertEquals(2L, te.ord());
assertNull(te.next());
// Test seekExact by term
assertTrue(te.seekExact(new BytesRef("b")));
assertEquals(1, te.ord());
assertTrue(te.seekExact(new BytesRef("a")));
assertEquals(0, te.ord());
assertTrue(te.seekExact(new BytesRef("c")));
assertEquals(2, te.ord());
// Test seekExact by ord
te.seekExact(1);
assertEquals(new BytesRef("b"), te.term());
te.seekExact(0);
assertEquals(new BytesRef("a"), te.term());
te.seekExact(2);
assertEquals(new BytesRef("c"), te.term());
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.
the class TestOrdsBlockTree method testThreeBlocks.
public void testThreeBlocks() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
List<String> terms = new ArrayList<>();
for (int i = 0; i < 36; i++) {
Document doc = new Document();
String term = "" + (char) (97 + i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
doc.add(newTextField("field", term, Field.Store.NO));
w.addDocument(doc);
}
for (int i = 0; i < 36; i++) {
Document doc = new Document();
String term = "m" + (char) (97 + i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
doc.add(newTextField("field", term, Field.Store.NO));
w.addDocument(doc);
}
for (int i = 0; i < 36; i++) {
Document doc = new Document();
String term = "mo" + (char) (97 + i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
doc.add(newTextField("field", term, Field.Store.NO));
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader r = w.getReader();
TermsEnum te = MultiFields.getTerms(r, "field").iterator();
if (VERBOSE) {
while (te.next() != null) {
System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString());
}
}
assertTrue(te.seekExact(new BytesRef("mo")));
assertEquals(27, te.ord());
te.seekExact(90);
assertEquals(new BytesRef("s"), te.term());
testEnum(te, terms);
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.
the class TestOrdsBlockTree method testFloorBlocks.
public void testFloorBlocks() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
for (int i = 0; i < 128; i++) {
Document doc = new Document();
String term = "" + (char) i;
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
}
doc.add(newStringField("field", term, Field.Store.NO));
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader r = DirectoryReader.open(w);
TermsEnum te = MultiFields.getTerms(r, "field").iterator();
if (VERBOSE) {
BytesRef term;
while ((term = te.next()) != null) {
System.out.println(" " + te.ord() + ": " + term.utf8ToString());
}
}
assertTrue(te.seekExact(new BytesRef("a")));
assertEquals(97, te.ord());
te.seekExact(98);
assertEquals(new BytesRef("b"), te.term());
assertTrue(te.seekExact(new BytesRef("z")));
assertEquals(122, te.ord());
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.
the class TestOrdsBlockTree method testSeveralNonRootBlocks.
public void testSeveralNonRootBlocks() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
List<String> terms = new ArrayList<>();
for (int i = 0; i < 30; i++) {
for (int j = 0; j < 30; j++) {
Document doc = new Document();
String term = "" + (char) (97 + i) + (char) (97 + j);
terms.add(term);
if (VERBOSE) {
System.out.println("term=" + term);
}
doc.add(newTextField("body", term, Field.Store.NO));
w.addDocument(doc);
}
}
w.forceMerge(1);
IndexReader r = DirectoryReader.open(w);
TermsEnum te = MultiFields.getTerms(r, "body").iterator();
for (int i = 0; i < 30; i++) {
for (int j = 0; j < 30; j++) {
String term = "" + (char) (97 + i) + (char) (97 + j);
if (VERBOSE) {
System.out.println("TEST: check term=" + term);
}
assertEquals(term, te.next().utf8ToString());
assertEquals(30 * i + j, te.ord());
}
}
testEnum(te, terms);
te.seekExact(0);
assertEquals("aa", te.term().utf8ToString());
r.close();
w.close();
dir.close();
}
Aggregations