Search in sources :

Example 66 with TermsEnum

use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.

the class TestOrdsBlockTree method testThreeBlocks.

public void testThreeBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
        Document doc = new Document();
        String term = "" + (char) (97 + i);
        terms.add(term);
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term);
        }
        doc.add(newTextField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
        Document doc = new Document();
        String term = "m" + (char) (97 + i);
        terms.add(term);
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term);
        }
        doc.add(newTextField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
        Document doc = new Document();
        String term = "mo" + (char) (97 + i);
        terms.add(term);
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term);
        }
        doc.add(newTextField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator();
    if (VERBOSE) {
        while (te.next() != null) {
            System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString());
        }
    }
    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());
    te.seekExact(90);
    assertEquals(new BytesRef("s"), te.term());
    testEnum(te, terms);
    r.close();
    w.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 67 with TermsEnum

use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.

the class TestOrdsBlockTree method testFloorBlocks.

public void testFloorBlocks() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int i = 0; i < 128; i++) {
        Document doc = new Document();
        String term = "" + (char) i;
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
        }
        doc.add(newStringField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w);
    TermsEnum te = MultiFields.getTerms(r, "field").iterator();
    if (VERBOSE) {
        BytesRef term;
        while ((term = te.next()) != null) {
            System.out.println("  " + te.ord() + ": " + term.utf8ToString());
        }
    }
    assertTrue(te.seekExact(new BytesRef("a")));
    assertEquals(97, te.ord());
    te.seekExact(98);
    assertEquals(new BytesRef("b"), te.term());
    assertTrue(te.seekExact(new BytesRef("z")));
    assertEquals(122, te.ord());
    r.close();
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 68 with TermsEnum

use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.

the class TestOrdsBlockTree method testSeveralNonRootBlocks.

public void testSeveralNonRootBlocks() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 30; i++) {
        for (int j = 0; j < 30; j++) {
            Document doc = new Document();
            String term = "" + (char) (97 + i) + (char) (97 + j);
            terms.add(term);
            if (VERBOSE) {
                System.out.println("term=" + term);
            }
            doc.add(newTextField("body", term, Field.Store.NO));
            w.addDocument(doc);
        }
    }
    w.forceMerge(1);
    IndexReader r = DirectoryReader.open(w);
    TermsEnum te = MultiFields.getTerms(r, "body").iterator();
    for (int i = 0; i < 30; i++) {
        for (int j = 0; j < 30; j++) {
            String term = "" + (char) (97 + i) + (char) (97 + j);
            if (VERBOSE) {
                System.out.println("TEST: check term=" + term);
            }
            assertEquals(term, te.next().utf8ToString());
            assertEquals(30 * i + j, te.ord());
        }
    }
    testEnum(te, terms);
    te.seekExact(0);
    assertEquals("aa", te.term().utf8ToString());
    r.close();
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) ArrayList(java.util.ArrayList) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 69 with TermsEnum

use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.

the class TestOrdsBlockTree method testTwoBlocks.

public void testTwoBlocks() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    List<String> terms = new ArrayList<>();
    for (int i = 0; i < 36; i++) {
        Document doc = new Document();
        String term = "" + (char) (97 + i);
        terms.add(term);
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term);
        }
        doc.add(newTextField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    for (int i = 0; i < 36; i++) {
        Document doc = new Document();
        String term = "m" + (char) (97 + i);
        terms.add(term);
        if (VERBOSE) {
            System.out.println("i=" + i + " term=" + term);
        }
        doc.add(newTextField("field", term, Field.Store.NO));
        w.addDocument(doc);
    }
    if (VERBOSE) {
        System.out.println("TEST: now forceMerge");
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator();
    assertTrue(te.seekExact(new BytesRef("mo")));
    assertEquals(27, te.ord());
    te.seekExact(54);
    assertEquals(new BytesRef("s"), te.term());
    Collections.sort(terms);
    for (int i = terms.size() - 1; i >= 0; i--) {
        te.seekExact(i);
        assertEquals(i, te.ord());
        assertEquals(terms.get(i), te.term().utf8ToString());
    }
    int iters = atLeast(1000);
    for (int iter = 0; iter < iters; iter++) {
        int ord = random().nextInt(terms.size());
        BytesRef term = new BytesRef(terms.get(ord));
        if (random().nextBoolean()) {
            if (VERBOSE) {
                System.out.println("TEST: iter=" + iter + " seek to ord=" + ord + " of " + terms.size());
            }
            te.seekExact(ord);
        } else {
            if (VERBOSE) {
                System.out.println("TEST: iter=" + iter + " seek to term=" + terms.get(ord) + " ord=" + ord + " of " + terms.size());
            }
            te.seekExact(term);
        }
        assertEquals(ord, te.ord());
        assertEquals(term, te.term());
    }
    r.close();
    w.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 70 with TermsEnum

use of org.apache.lucene.index.TermsEnum in project lucene-solr by apache.

the class TestOrdsBlockTree method testSeekCeilNotFound.

public void testSeekCeilNotFound() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    // Get empty string in there!
    doc.add(newStringField("field", "", Field.Store.NO));
    w.addDocument(doc);
    for (int i = 0; i < 36; i++) {
        doc = new Document();
        String term = "" + (char) (97 + i);
        String term2 = "a" + (char) (97 + i);
        doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);
    IndexReader r = w.getReader();
    TermsEnum te = MultiFields.getTerms(r, "field").iterator();
    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] { 0x22 })));
    assertEquals("a", te.term().utf8ToString());
    assertEquals(1L, te.ord());
    r.close();
    w.close();
    dir.close();
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

TermsEnum (org.apache.lucene.index.TermsEnum)155 BytesRef (org.apache.lucene.util.BytesRef)116 Terms (org.apache.lucene.index.Terms)103 PostingsEnum (org.apache.lucene.index.PostingsEnum)52 ArrayList (java.util.ArrayList)31 Term (org.apache.lucene.index.Term)31 IndexReader (org.apache.lucene.index.IndexReader)29 LeafReader (org.apache.lucene.index.LeafReader)28 IOException (java.io.IOException)26 Fields (org.apache.lucene.index.Fields)26 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)25 Document (org.apache.lucene.document.Document)24 Directory (org.apache.lucene.store.Directory)24 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)19 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)18 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11 DirectoryReader (org.apache.lucene.index.DirectoryReader)11 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)10 Bits (org.apache.lucene.util.Bits)10