Search in sources :

Example 96 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestDocTermOrds method testSimple.

public void testSimple() throws Exception {
    Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    field.setStringValue("a b c");
    w.addDocument(doc);
    field.setStringValue("d e f");
    w.addDocument(doc);
    field.setStringValue("a f");
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
    SortedSetDocValues iter = dto.iterator(ar);
    assertEquals(0, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(1, iter.nextOrd());
    assertEquals(2, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    assertEquals(1, iter.nextDoc());
    assertEquals(3, iter.nextOrd());
    assertEquals(4, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    assertEquals(2, iter.nextDoc());
    assertEquals(0, iter.nextOrd());
    assertEquals(5, iter.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
    r.close();
    dir.close();
}
Also used : StringField(org.apache.lucene.document.StringField) LegacyLongField(org.apache.solr.legacy.LegacyLongField) LegacyIntField(org.apache.solr.legacy.LegacyIntField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 97 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestDocTermOrds method testRandomWithPrefix.

public void testRandomWithPrefix() throws Exception {
    Directory dir = newDirectory();
    final Set<String> prefixes = new HashSet<>();
    final int numPrefix = TestUtil.nextInt(random(), 2, 7);
    if (VERBOSE) {
        System.out.println("TEST: use " + numPrefix + " prefixes");
    }
    while (prefixes.size() < numPrefix) {
        prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
    //prefixes.add(_TestUtil.randomSimpleString(random));
    }
    final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
    final int NUM_TERMS = atLeast(20);
    final Set<BytesRef> terms = new HashSet<>();
    while (terms.size() < NUM_TERMS) {
        final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
        //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
        if (s.length() > 0) {
            terms.add(new BytesRef(s));
        }
    }
    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
    Arrays.sort(termsArray);
    final int NUM_DOCS = atLeast(100);
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
        Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
        conf.setCodec(codec);
    }
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
    final int[][] idToOrds = new int[NUM_DOCS][];
    final Set<Integer> ordsForDocSet = new HashSet<>();
    for (int id = 0; id < NUM_DOCS; id++) {
        Document doc = new Document();
        doc.add(new LegacyIntField("id", id, Field.Store.YES));
        final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
        while (ordsForDocSet.size() < termCount) {
            ordsForDocSet.add(random().nextInt(termsArray.length));
        }
        final int[] ordsForDoc = new int[termCount];
        int upto = 0;
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
        }
        for (int ord : ordsForDocSet) {
            ordsForDoc[upto++] = ord;
            Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
            if (VERBOSE) {
                System.out.println("  f=" + termsArray[ord].utf8ToString());
            }
            doc.add(field);
        }
        ordsForDocSet.clear();
        Arrays.sort(ordsForDoc);
        idToOrds[id] = ordsForDoc;
        w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();
    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
    }
    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(slowR);
    for (String prefix : prefixesArray) {
        final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
        final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
        for (int id = 0; id < NUM_DOCS; id++) {
            final int[] docOrds = idToOrds[id];
            final List<Integer> newOrds = new ArrayList<>();
            for (int ord : idToOrds[id]) {
                if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
                    newOrds.add(ord);
                }
            }
            final int[] newOrdsArray = new int[newOrds.size()];
            int upto = 0;
            for (int ord : newOrds) {
                newOrdsArray[upto++] = ord;
            }
            idToOrdsPrefix[id] = newOrdsArray;
        }
        for (LeafReaderContext ctx : r.leaves()) {
            if (VERBOSE) {
                System.out.println("\nTEST: sub=" + ctx.reader());
            }
            verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
        }
        // ord, so this forces the OrdWrapper to run:
        if (VERBOSE) {
            System.out.println("TEST: top reader");
        }
        verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
    }
    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheHelper().getKey());
    r.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) LegacyLongField(org.apache.solr.legacy.LegacyLongField) LegacyIntField(org.apache.solr.legacy.LegacyIntField) Field(org.apache.lucene.document.Field) Codec(org.apache.lucene.codecs.Codec) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LegacyIntField(org.apache.solr.legacy.LegacyIntField) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 98 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestDocTermOrds method testActuallySingleValued.

public void testActuallySingleValued() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwconfig = newIndexWriterConfig(null);
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwconfig);
    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);
    doc = new Document();
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
    // actually a single-valued field
    assertNotNull(DocValues.unwrapSingleton(v));
    assertEquals(2, v.getValueCount());
    assertEquals(0, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(1, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    assertEquals(3, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
    BytesRef value = v.lookupOrd(0);
    assertEquals("bar", value.utf8ToString());
    value = v.lookupOrd(1);
    assertEquals("baz", value.utf8ToString());
    ir.close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 99 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestFieldCache method testNonexistantFields.

public void testNonexistantFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Example 100 with LeafReader

use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.

the class TestFieldCache method testNonIndexedFields.

public void testNonIndexedFields() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StoredField("bogusbytes", "bogus"));
    doc.add(new StoredField("bogusshorts", "bogus"));
    doc.add(new StoredField("bogusints", "bogus"));
    doc.add(new StoredField("boguslongs", "bogus"));
    doc.add(new StoredField("bogusfloats", "bogus"));
    doc.add(new StoredField("bogusdoubles", "bogus"));
    doc.add(new StoredField("bogusterms", "bogus"));
    doc.add(new StoredField("bogustermsindex", "bogus"));
    doc.add(new StoredField("bogusmultivalued", "bogus"));
    doc.add(new StoredField("bogusbits", "bogus"));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    final FieldCache cache = FieldCache.DEFAULT;
    cache.purgeAllCaches();
    assertEquals(0, cache.getCacheEntries().length);
    NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, ints.nextDoc());
    NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, longs.nextDoc());
    NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, floats.nextDoc());
    NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
    assertEquals(NO_MORE_DOCS, doubles.nextDoc());
    BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
    assertEquals(NO_MORE_DOCS, binaries.nextDoc());
    SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
    assertEquals(NO_MORE_DOCS, sorted.nextDoc());
    SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
    assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
    Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
    assertFalse(bits.get(0));
    // check that we cached nothing
    assertEquals(0, cache.getCacheEntries().length);
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) StoredField(org.apache.lucene.document.StoredField) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) DirectoryReader(org.apache.lucene.index.DirectoryReader) Bits(org.apache.lucene.util.Bits) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory)

Aggregations

LeafReader (org.apache.lucene.index.LeafReader)168 BytesRef (org.apache.lucene.util.BytesRef)65 Document (org.apache.lucene.document.Document)61 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)58 Directory (org.apache.lucene.store.Directory)56 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)55 DirectoryReader (org.apache.lucene.index.DirectoryReader)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)42 Test (org.junit.Test)36 IndexWriter (org.apache.lucene.index.IndexWriter)32 Terms (org.apache.lucene.index.Terms)30 TermsEnum (org.apache.lucene.index.TermsEnum)28 NumericDocValues (org.apache.lucene.index.NumericDocValues)24 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)24 SortedDocValues (org.apache.lucene.index.SortedDocValues)22 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)21 IndexReader (org.apache.lucene.index.IndexReader)20 Term (org.apache.lucene.index.Term)20 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)18 Bits (org.apache.lucene.util.Bits)18