use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method sortedSetOrdCountIterable.
/** Converts number-of-ords per document from {@link SortedSetDocValues} into {@code Iterable<Number>}.
*
* @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedSetOrdCountIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo, final int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final SortedSetDocValues values;
try {
values = valuesProducer.getSortedSet(fieldInfo);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int nextDocID;
private int ordCount;
@Override
public boolean hasNext() {
return nextDocID < maxDoc;
}
@Override
public Number next() {
try {
if (nextDocID > values.docID()) {
if (values.nextDoc() != NO_MORE_DOCS) {
ordCount = 0;
while (values.nextOrd() != NO_MORE_ORDS) {
ordCount++;
}
}
}
int result;
if (nextDocID == values.docID()) {
result = ordCount;
} else {
result = 0;
}
nextDocID++;
return result;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
};
}
};
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method sortedSetOrdsIterable.
/** Converts all concatenated ords (in docID order) from {@link SortedSetDocValues} into {@code Iterable<Number>}.
*
* @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedSetOrdsIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final SortedSetDocValues values;
try {
values = valuesProducer.getSortedSet(fieldInfo);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private boolean nextIsSet;
private long nextOrd;
private void setNext() {
try {
if (nextIsSet == false) {
if (values.docID() == -1) {
values.nextDoc();
}
while (true) {
if (values.docID() == NO_MORE_DOCS) {
nextOrd = -1;
break;
}
nextOrd = values.nextOrd();
if (nextOrd != -1) {
break;
}
values.nextDoc();
}
nextIsSet = true;
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
@Override
public boolean hasNext() {
setNext();
return nextOrd != -1;
}
@Override
public Number next() {
setNext();
assert nextOrd != -1;
nextIsSet = false;
return nextOrd;
}
};
}
};
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestFaceting method testMultiThreadedFacets.
@Test
public void testMultiThreadedFacets() throws Exception {
add50ocs();
String[] methodParam = random().nextBoolean() ? new String[] {} : new String[] { "facet.method", "uif" };
assertQ("check no threading, threads == 0", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "0", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
RefCounted<SolrIndexSearcher> currentSearcherRef = h.getCore().getSearcher();
try {
SolrIndexSearcher currentSearcher = currentSearcherRef.get();
SortedSetDocValues ui0 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f0_ws");
SortedSetDocValues ui1 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f1_ws");
SortedSetDocValues ui2 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f2_ws");
SortedSetDocValues ui3 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f3_ws");
SortedSetDocValues ui4 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f4_ws");
SortedSetDocValues ui5 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f5_ws");
SortedSetDocValues ui6 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f6_ws");
SortedSetDocValues ui7 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f7_ws");
SortedSetDocValues ui8 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f8_ws");
SortedSetDocValues ui9 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f9_ws");
assertQ("check threading, more threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "1000", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
assertQ("check threading, fewer threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "3", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
// After this all, the uninverted fields should be exactly the same as they were the first time, even if we
// blast a whole bunch of identical fields at the facet code.
// The way fetching the uninverted field is written, all this is really testing is if the cache is working.
// It's NOT testing whether the pending/sleep is actually functioning, I had to do that by hand since I don't
// see how to make sure that uninverting the field multiple times actually happens to hit the wait state.
assertQ("check threading, more threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.threads", "1000", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]");
} finally {
currentSearcherRef.decref();
}
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testSortedTermsEnum.
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "world", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "beer", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
LeafReader ar = getOnlyLeafReader(ireader);
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// lookupTerm(BytesRef)
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
ireader.close();
directory.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testSimple.
public void testSimple() throws Exception {
Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field field = newTextField("field", "", Field.Store.NO);
doc.add(field);
field.setStringValue("a b c");
w.addDocument(doc);
field.setStringValue("d e f");
w.addDocument(doc);
field.setStringValue("a f");
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
SortedSetDocValues iter = dto.iterator(ar);
assertEquals(0, iter.nextDoc());
assertEquals(0, iter.nextOrd());
assertEquals(1, iter.nextOrd());
assertEquals(2, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
assertEquals(1, iter.nextDoc());
assertEquals(3, iter.nextOrd());
assertEquals(4, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
assertEquals(2, iter.nextDoc());
assertEquals(0, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
r.close();
dir.close();
}
Aggregations