Search in sources :

Example 26 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method sortedSetOrdCountIterable.

/** Converts number-of-ords per document from {@link SortedSetDocValues} into {@code Iterable<Number>}.
   *
   * @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedSetOrdCountIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo, final int maxDoc) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final SortedSetDocValues values;
            try {
                values = valuesProducer.getSortedSet(fieldInfo);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private int nextDocID;

                private int ordCount;

                @Override
                public boolean hasNext() {
                    return nextDocID < maxDoc;
                }

                @Override
                public Number next() {
                    try {
                        if (nextDocID > values.docID()) {
                            if (values.nextDoc() != NO_MORE_DOCS) {
                                ordCount = 0;
                                while (values.nextOrd() != NO_MORE_ORDS) {
                                    ordCount++;
                                }
                            }
                        }
                        int result;
                        if (nextDocID == values.docID()) {
                            result = ordCount;
                        } else {
                            result = 0;
                        }
                        nextDocID++;
                        return result;
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }
            };
        }
    };
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Example 27 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method sortedSetOrdsIterable.

/** Converts all concatenated ords (in docID order) from {@link SortedSetDocValues} into {@code Iterable&lt;Number&gt;}.
   *
   * @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedSetOrdsIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
    return new Iterable<Number>() {

        @Override
        public Iterator<Number> iterator() {
            final SortedSetDocValues values;
            try {
                values = valuesProducer.getSortedSet(fieldInfo);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<Number>() {

                private boolean nextIsSet;

                private long nextOrd;

                private void setNext() {
                    try {
                        if (nextIsSet == false) {
                            if (values.docID() == -1) {
                                values.nextDoc();
                            }
                            while (true) {
                                if (values.docID() == NO_MORE_DOCS) {
                                    nextOrd = -1;
                                    break;
                                }
                                nextOrd = values.nextOrd();
                                if (nextOrd != -1) {
                                    break;
                                }
                                values.nextDoc();
                            }
                            nextIsSet = true;
                        }
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }

                @Override
                public boolean hasNext() {
                    setNext();
                    return nextOrd != -1;
                }

                @Override
                public Number next() {
                    setNext();
                    assert nextOrd != -1;
                    nextIsSet = false;
                    return nextOrd;
                }
            };
        }
    };
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Iterator(java.util.Iterator) IOException(java.io.IOException)

Example 28 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestFaceting method testMultiThreadedFacets.

@Test
public void testMultiThreadedFacets() throws Exception {
    add50ocs();
    String[] methodParam = random().nextBoolean() ? new String[] {} : new String[] { "facet.method", "uif" };
    assertQ("check no threading, threads == 0", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "0", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
    RefCounted<SolrIndexSearcher> currentSearcherRef = h.getCore().getSearcher();
    try {
        SolrIndexSearcher currentSearcher = currentSearcherRef.get();
        SortedSetDocValues ui0 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f0_ws");
        SortedSetDocValues ui1 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f1_ws");
        SortedSetDocValues ui2 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f2_ws");
        SortedSetDocValues ui3 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f3_ws");
        SortedSetDocValues ui4 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f4_ws");
        SortedSetDocValues ui5 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f5_ws");
        SortedSetDocValues ui6 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f6_ws");
        SortedSetDocValues ui7 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f7_ws");
        SortedSetDocValues ui8 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f8_ws");
        SortedSetDocValues ui9 = DocValues.getSortedSet(currentSearcher.getSlowAtomicReader(), "f9_ws");
        assertQ("check threading, more threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "1000", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
        assertQ("check threading, fewer threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.threads", "3", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]", "//lst[@name='f0_ws']/int[@name='zero_1'][.='25']", "//lst[@name='f0_ws']/int[@name='zero_2'][.='25']", "//lst[@name='f1_ws']/int[@name='one_1'][.='33']", "//lst[@name='f1_ws']/int[@name='one_3'][.='17']", "//lst[@name='f2_ws']/int[@name='two_1'][.='37']", "//lst[@name='f2_ws']/int[@name='two_4'][.='13']", "//lst[@name='f3_ws']/int[@name='three_1'][.='40']", "//lst[@name='f3_ws']/int[@name='three_5'][.='10']", "//lst[@name='f4_ws']/int[@name='four_1'][.='41']", "//lst[@name='f4_ws']/int[@name='four_6'][.='9']", "//lst[@name='f5_ws']/int[@name='five_1'][.='42']", "//lst[@name='f5_ws']/int[@name='five_7'][.='8']", "//lst[@name='f6_ws']/int[@name='six_1'][.='43']", "//lst[@name='f6_ws']/int[@name='six_8'][.='7']", "//lst[@name='f7_ws']/int[@name='seven_1'][.='44']", "//lst[@name='f7_ws']/int[@name='seven_9'][.='6']", "//lst[@name='f8_ws']/int[@name='eight_1'][.='45']", "//lst[@name='f8_ws']/int[@name='eight_10'][.='5']", "//lst[@name='f9_ws']/int[@name='nine_1'][.='45']", "//lst[@name='f9_ws']/int[@name='nine_11'][.='5']");
        // After this all, the uninverted fields should be exactly the same as they were the first time, even if we
        // blast a whole bunch of identical fields at the facet code.
        // The way fetching the uninverted field is written, all this is really testing is if the cache is working.
        // It's NOT testing whether the pending/sleep is actually functioning, I had to do that by hand since I don't
        // see how to make sure that uninverting the field multiple times actually happens to hit the wait state.
        assertQ("check threading, more threads than fields", req(methodParam, "q", "id:*", "indent", "true", "fl", "id", "rows", "1", "facet", "true", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f0_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f1_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f2_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f3_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f4_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f5_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f6_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f7_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f8_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.field", "f9_ws", "facet.threads", "1000", "facet.limit", "-1"), "*[count(//lst[@name='facet_fields']/lst)=10]", "*[count(//lst[@name='facet_fields']/lst/int)=20]");
    } finally {
        currentSearcherRef.decref();
    }
}
Also used : SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Test(org.junit.Test)

Example 29 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testSortedTermsEnum.

public void testSortedTermsEnum() throws IOException {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
    iwconfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
    Document doc = new Document();
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("field", "world", Field.Store.NO));
    // we need a second value for a doc, or we don't actually test DocTermOrds!
    doc.add(new StringField("field", "hello", Field.Store.NO));
    iwriter.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("field", "beer", Field.Store.NO));
    iwriter.addDocument(doc);
    iwriter.forceMerge(1);
    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();
    LeafReader ar = getOnlyLeafReader(ireader);
    SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
    assertEquals(3, dv.getValueCount());
    TermsEnum termsEnum = dv.termsEnum();
    // next()
    assertEquals("beer", termsEnum.next().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals("hello", termsEnum.next().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals("world", termsEnum.next().utf8ToString());
    assertEquals(2, termsEnum.ord());
    // seekCeil()
    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
    // seekExact()
    assertTrue(termsEnum.seekExact(new BytesRef("beer")));
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("hello")));
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    assertTrue(termsEnum.seekExact(new BytesRef("world")));
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
    // seek(ord)
    termsEnum.seekExact(0);
    assertEquals("beer", termsEnum.term().utf8ToString());
    assertEquals(0, termsEnum.ord());
    termsEnum.seekExact(1);
    assertEquals("hello", termsEnum.term().utf8ToString());
    assertEquals(1, termsEnum.ord());
    termsEnum.seekExact(2);
    assertEquals("world", termsEnum.term().utf8ToString());
    assertEquals(2, termsEnum.ord());
    // lookupTerm(BytesRef) 
    assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
    assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
    assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
    assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
    assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
    assertEquals(2, dv.lookupTerm(new BytesRef("world")));
    assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
    ireader.close();
    directory.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) StringField(org.apache.lucene.document.StringField) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 30 with SortedSetDocValues

use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.

the class TestDocTermOrds method testSimple.

public void testSimple() throws Exception {
    Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);
    field.setStringValue("d e f");
    w.addDocument(doc);
    field.setStringValue("a f");
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);
    assertEquals(0, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    assertEquals(1, iter.nextDoc());
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    assertEquals(2, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    r.close();
    dir.close();
}
Also used : StringField(org.apache.lucene.document.StringField) LegacyLongField(org.apache.solr.legacy.LegacyLongField) LegacyIntField(org.apache.solr.legacy.LegacyIntField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)53 BytesRef (org.apache.lucene.util.BytesRef)33 Document (org.apache.lucene.document.Document)25 LeafReader (org.apache.lucene.index.LeafReader)24 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)22 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 Directory (org.apache.lucene.store.Directory)22 DirectoryReader (org.apache.lucene.index.DirectoryReader)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)17 IndexWriter (org.apache.lucene.index.IndexWriter)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)10 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)9 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)9 NumericDocValues (org.apache.lucene.index.NumericDocValues)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7