use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.
the class TestLucene54DocValuesFormat method assertEquals.
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}
}
use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method assertEquals.
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}
}
use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.
the class TestFieldCacheVsDocValues method assertEquals.
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}
}
use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.
the class BasePostingsFormatTestCase method testInvertedWrite.
// LUCENE-5123: make sure we can visit postings twice
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those
// threads iterates over the map while the flushing
// thread might be adding to it:
final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();
final AtomicLong sumDocFreq = new AtomicLong();
final AtomicLong sumTotalTermFreq = new AtomicLong();
// TODO: would be better to use / delegate to the current
// Codec returned by getCodec()
iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {
@Override
public PostingsFormat postingsFormat() {
final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
final Thread mainThread = Thread.currentThread();
return new PostingsFormat(defaultPostingsFormat.getName()) {
@Override
public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
fieldsConsumer.write(fields);
boolean isMerge = state.context.context == IOContext.Context.MERGE;
// in this test:
assert isMerge || Thread.currentThread() == mainThread;
// We iterate the provided TermsEnum
// twice, so we excercise this new freedom
// with the inverted API; if
// addOnSecondPass is true, we add up
// term stats on the 2nd iteration:
boolean addOnSecondPass = random().nextBoolean();
//System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
// Gather our own stats:
Terms terms = fields.terms("body");
assert terms != null;
TermsEnum termsEnum = terms.iterator();
PostingsEnum docs = null;
while (termsEnum.next() != null) {
BytesRef term = termsEnum.term();
// TODO: also sometimes ask for payloads/offsets?
boolean noPositions = random().nextBoolean();
if (noPositions) {
docs = termsEnum.postings(docs, PostingsEnum.FREQS);
} else {
docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
int limit = TestUtil.nextInt(random(), 1, docs.freq());
if (!noPositions) {
for (int i = 0; i < limit; i++) {
docs.nextPosition();
}
}
}
String termString = term.utf8ToString();
// During merge we should only see terms
// we had already seen during a
// previous flush:
assertTrue(isMerge == false || termFreqs.containsKey(termString));
if (isMerge == false) {
if (addOnSecondPass == false) {
TermFreqs tf = termFreqs.get(termString);
if (tf == null) {
tf = new TermFreqs();
termFreqs.put(termString, tf);
}
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
} else if (termFreqs.containsKey(termString) == false) {
// Add placeholder (2nd pass will
// set its counts):
termFreqs.put(termString, new TermFreqs());
}
}
}
// Also test seeking the TermsEnum:
for (String term : termFreqs.keySet()) {
if (termsEnum.seekExact(new BytesRef(term))) {
// TODO: also sometimes ask for payloads/offsets?
boolean noPositions = random().nextBoolean();
if (noPositions) {
docs = termsEnum.postings(docs, PostingsEnum.FREQS);
} else {
docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
int limit = TestUtil.nextInt(random(), 1, docs.freq());
if (!noPositions) {
for (int i = 0; i < limit; i++) {
docs.nextPosition();
}
}
}
if (isMerge == false && addOnSecondPass) {
TermFreqs tf = termFreqs.get(term);
assert tf != null;
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
}
//System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
assertTrue(docFreq <= termFreqs.get(term).docFreq);
assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
}
}
// Also test seekCeil
for (int iter = 0; iter < 10; iter++) {
BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.NOT_FOUND) {
assertTrue(term.compareTo(termsEnum.term()) < 0);
}
}
}
@Override
public void close() throws IOException {
fieldsConsumer.close();
}
};
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return defaultPostingsFormat.fieldsProducer(state);
}
};
}
});
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
LineFileDocs docs = new LineFileDocs(random());
int bytesToIndex = atLeast(100) * 1024;
int bytesIndexed = 0;
while (bytesIndexed < bytesToIndex) {
Document doc = docs.nextDoc();
Document justBodyDoc = new Document();
justBodyDoc.add(doc.getField("body"));
w.addDocument(justBodyDoc);
bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
}
IndexReader r = w.getReader();
w.close();
Terms terms = MultiFields.getTerms(r, "body");
assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());
TermsEnum termsEnum = terms.iterator();
long termCount = 0;
boolean supportsOrds = true;
while (termsEnum.next() != null) {
BytesRef term = termsEnum.term();
assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
if (supportsOrds) {
long ord;
try {
ord = termsEnum.ord();
} catch (UnsupportedOperationException uoe) {
supportsOrds = false;
ord = -1;
}
if (ord != -1) {
assertEquals(termCount, ord);
}
}
termCount++;
}
assertEquals(termFreqs.size(), termCount);
r.close();
dir.close();
}
use of org.apache.lucene.index.TermsEnum.SeekStatus in project lucene-solr by apache.
the class TestBlockPostingsFormat3 method assertTermsSeeking.
private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
TermsEnum leftEnum = null;
TermsEnum rightEnum = null;
// just an upper bound
int numTests = atLeast(20);
Random random = random();
// collect this number of terms from the left side
HashSet<BytesRef> tests = new HashSet<>();
int numPasses = 0;
while (numPasses < 10 && tests.size() < numTests) {
leftEnum = leftTerms.iterator();
BytesRef term = null;
while ((term = leftEnum.next()) != null) {
int code = random.nextInt(10);
if (code == 0) {
// the term
tests.add(BytesRef.deepCopyOf(term));
} else if (code == 1) {
// truncated subsequence of term
term = BytesRef.deepCopyOf(term);
if (term.length > 0) {
// truncate it
term.length = random.nextInt(term.length);
}
} else if (code == 2) {
// term, but ensure a non-zero offset
byte[] newbytes = new byte[term.length + 5];
System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
tests.add(new BytesRef(newbytes, 5, term.length));
}
}
numPasses++;
}
ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
Collections.shuffle(shuffledTests, random);
for (BytesRef b : shuffledTests) {
leftEnum = leftTerms.iterator();
rightEnum = rightTerms.iterator();
assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
SeekStatus leftStatus;
SeekStatus rightStatus;
leftStatus = leftEnum.seekCeil(b);
rightStatus = rightEnum.seekCeil(b);
assertEquals(leftStatus, rightStatus);
if (leftStatus != SeekStatus.END) {
assertEquals(leftEnum.term(), rightEnum.term());
}
leftStatus = leftEnum.seekCeil(b);
rightStatus = rightEnum.seekCeil(b);
assertEquals(leftStatus, rightStatus);
if (leftStatus != SeekStatus.END) {
assertEquals(leftEnum.term(), rightEnum.term());
}
}
}
Aggregations