Search in sources :

Example 1 with SeekStatus

use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.

the class TestLucene54DocValuesFormat method assertEquals.

private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
    BytesRef ref;
    // sequential next() through all terms
    while ((ref = expected.next()) != null) {
        assertEquals(ref, actual.next());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    assertNull(actual.next());
    // sequential seekExact(ord) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        actual.seekExact(i);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertTrue(actual.seekExact(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(ord)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(randomOrd);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(expected.term());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
        SeekStatus expectedStatus = expected.seekCeil(target);
        assertEquals(expectedStatus, actual.seekCeil(target));
        if (expectedStatus != SeekStatus.END) {
            assertEquals(expected.ord(), actual.ord());
            assertEquals(expected.term(), actual.term());
        }
    }
}
Also used : SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with SeekStatus

use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method assertEquals.

private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
    BytesRef ref;
    // sequential next() through all terms
    while ((ref = expected.next()) != null) {
        assertEquals(ref, actual.next());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    assertNull(actual.next());
    // sequential seekExact(ord) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        actual.seekExact(i);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertTrue(actual.seekExact(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(ord)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(randomOrd);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(expected.term());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
        SeekStatus expectedStatus = expected.seekCeil(target);
        assertEquals(expectedStatus, actual.seekCeil(target));
        if (expectedStatus != SeekStatus.END) {
            assertEquals(expected.ord(), actual.ord());
            assertEquals(expected.term(), actual.term());
        }
    }
}
Also used : SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with SeekStatus

use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.

the class TestFieldCacheVsDocValues method assertEquals.

private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
    BytesRef ref;
    // sequential next() through all terms
    while ((ref = expected.next()) != null) {
        assertEquals(ref, actual.next());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    assertNull(actual.next());
    // sequential seekExact(ord) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        actual.seekExact(i);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekExact(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertTrue(actual.seekExact(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // sequential seekCeil(BytesRef) through all terms
    for (long i = 0; i < numOrds; i++) {
        expected.seekExact(i);
        assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(ord)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(randomOrd);
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekExact(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
        expected.seekExact(randomOrd);
        actual.seekExact(expected.term());
        assertEquals(expected.ord(), actual.ord());
        assertEquals(expected.term(), actual.term());
    }
    // random seekCeil(BytesRef)
    for (long i = 0; i < numOrds; i++) {
        BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
        SeekStatus expectedStatus = expected.seekCeil(target);
        assertEquals(expectedStatus, actual.seekCeil(target));
        if (expectedStatus != SeekStatus.END) {
            assertEquals(expected.ord(), actual.ord());
            assertEquals(expected.term(), actual.term());
        }
    }
}
Also used : SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with SeekStatus

use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.

the class BasePostingsFormatTestCase method testInvertedWrite.

// LUCENE-5123: make sure we can visit postings twice
// during flush/merge
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();
    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();
    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()
    iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {

        @Override
        public PostingsFormat postingsFormat() {
            final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
            final Thread mainThread = Thread.currentThread();
            return new PostingsFormat(defaultPostingsFormat.getName()) {

                @Override
                public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
                    final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
                    return new FieldsConsumer() {

                        @Override
                        public void write(Fields fields) throws IOException {
                            fieldsConsumer.write(fields);
                            boolean isMerge = state.context.context == IOContext.Context.MERGE;
                            // in this test:
                            assert isMerge || Thread.currentThread() == mainThread;
                            // We iterate the provided TermsEnum
                            // twice, so we excercise this new freedom
                            // with the inverted API; if
                            // addOnSecondPass is true, we add up
                            // term stats on the 2nd iteration:
                            boolean addOnSecondPass = random().nextBoolean();
                            //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
                            // Gather our own stats:
                            Terms terms = fields.terms("body");
                            assert terms != null;
                            TermsEnum termsEnum = terms.iterator();
                            PostingsEnum docs = null;
                            while (termsEnum.next() != null) {
                                BytesRef term = termsEnum.term();
                                // TODO: also sometimes ask for payloads/offsets?
                                boolean noPositions = random().nextBoolean();
                                if (noPositions) {
                                    docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                } else {
                                    docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                }
                                int docFreq = 0;
                                long totalTermFreq = 0;
                                while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                    docFreq++;
                                    totalTermFreq += docs.freq();
                                    int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                    if (!noPositions) {
                                        for (int i = 0; i < limit; i++) {
                                            docs.nextPosition();
                                        }
                                    }
                                }
                                String termString = term.utf8ToString();
                                // During merge we should only see terms
                                // we had already seen during a
                                // previous flush:
                                assertTrue(isMerge == false || termFreqs.containsKey(termString));
                                if (isMerge == false) {
                                    if (addOnSecondPass == false) {
                                        TermFreqs tf = termFreqs.get(termString);
                                        if (tf == null) {
                                            tf = new TermFreqs();
                                            termFreqs.put(termString, tf);
                                        }
                                        tf.docFreq += docFreq;
                                        tf.totalTermFreq += totalTermFreq;
                                        sumDocFreq.addAndGet(docFreq);
                                        sumTotalTermFreq.addAndGet(totalTermFreq);
                                    } else if (termFreqs.containsKey(termString) == false) {
                                        // Add placeholder (2nd pass will
                                        // set its counts):
                                        termFreqs.put(termString, new TermFreqs());
                                    }
                                }
                            }
                            // Also test seeking the TermsEnum:
                            for (String term : termFreqs.keySet()) {
                                if (termsEnum.seekExact(new BytesRef(term))) {
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }
                                    if (isMerge == false && addOnSecondPass) {
                                        TermFreqs tf = termFreqs.get(term);
                                        assert tf != null;
                                        tf.docFreq += docFreq;
                                        tf.totalTermFreq += totalTermFreq;
                                        sumDocFreq.addAndGet(docFreq);
                                        sumTotalTermFreq.addAndGet(totalTermFreq);
                                    }
                                    //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                    assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                    assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                }
                            }
                            // Also test seekCeil
                            for (int iter = 0; iter < 10; iter++) {
                                BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
                                SeekStatus status = termsEnum.seekCeil(term);
                                if (status == SeekStatus.NOT_FOUND) {
                                    assertTrue(term.compareTo(termsEnum.term()) < 0);
                                }
                            }
                        }

                        @Override
                        public void close() throws IOException {
                            fieldsConsumer.close();
                        }
                    };
                }

                @Override
                public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                    return defaultPostingsFormat.fieldsProducer(state);
                }
            };
        }
    });
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        Document justBodyDoc = new Document();
        justBodyDoc.add(doc.getField("body"));
        w.addDocument(justBodyDoc);
        bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
    }
    IndexReader r = w.getReader();
    w.close();
    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());
    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);
    r.close();
    dir.close();
}
Also used : FieldsConsumer(org.apache.lucene.codecs.FieldsConsumer) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LineFileDocs(org.apache.lucene.util.LineFileDocs) FieldsProducer(org.apache.lucene.codecs.FieldsProducer) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) PostingsFormat(org.apache.lucene.codecs.PostingsFormat) SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus)

Example 5 with SeekStatus

use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.

the class TestBlockPostingsFormat3 method assertTermsSeeking.

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;
    TermsEnum rightEnum = null;
    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();
    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte[] newbytes = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }
    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);
    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        SeekStatus leftStatus;
        SeekStatus rightStatus;
        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}
Also used : Random(java.util.Random) ArrayList(java.util.ArrayList) SeekStatus(org.apache.lucene.index.TermsEnum.SeekStatus) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum) HashSet(java.util.HashSet)

Aggregations

SeekStatus (org.apache.lucene.index.TermsEnum.SeekStatus)6 BytesRef (org.apache.lucene.util.BytesRef)5 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Random (java.util.Random)2 IOException (java.io.IOException)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 FieldsConsumer (org.apache.lucene.codecs.FieldsConsumer)1 FieldsProducer (org.apache.lucene.codecs.FieldsProducer)1 FilterCodec (org.apache.lucene.codecs.FilterCodec)1 PostingsFormat (org.apache.lucene.codecs.PostingsFormat)1 Document (org.apache.lucene.document.Document)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 Directory (org.apache.lucene.store.Directory)1 LineFileDocs (org.apache.lucene.util.LineFileDocs)1