use of org.apache.lucene.util.automaton.CompiledAutomaton in project lucene-solr by apache.
the class TestBlockPostingsFormat3 method assertTerms.
// following code is almost an exact dup of code from TestDuelingCodecs: sorry!
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
if (leftTerms == null || rightTerms == null) {
assertNull(leftTerms);
assertNull(rightTerms);
return;
}
assertTermsStatistics(leftTerms, rightTerms);
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator();
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions);
assertTermsSeeking(leftTerms, rightTerms);
if (deep) {
int numIntersections = atLeast(3);
for (int i = 0; i < numIntersections; i++) {
String re = AutomatonTestUtil.randomRegexp(random());
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
// TODO: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions);
}
}
}
}
use of org.apache.lucene.util.automaton.CompiledAutomaton in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testSortedTermsEnum.
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("world")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("beer")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("aba")));
assertEquals(0, termsEnum.ord());
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// NORMAL automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
assertNull(termsEnum.next());
// SINGLE automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertNull(termsEnum.next());
ireader.close();
directory.close();
}
Aggregations