use of org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings in project lucene-solr by apache.
the class RandomPostingsTester method testTermsOneThread.
private void testTermsOneThread(Random random, Fields fieldsSource, EnumSet<Option> options, IndexOptions maxTestOptions, IndexOptions maxIndexOptions, boolean alwaysTestMax) throws IOException {
ThreadState threadState = new ThreadState();
// Test random terms/fields:
List<TermState> termStates = new ArrayList<>();
List<FieldAndTerm> termStateTerms = new ArrayList<>();
boolean supportsOrds = true;
Collections.shuffle(allTerms, random);
int upto = 0;
while (upto < allTerms.size()) {
boolean useTermState = termStates.size() != 0 && random.nextInt(5) == 1;
boolean useTermOrd = supportsOrds && useTermState == false && random.nextInt(5) == 1;
FieldAndTerm fieldAndTerm;
TermsEnum termsEnum;
TermState termState = null;
if (!useTermState) {
// Seek by random field+term:
fieldAndTerm = allTerms.get(upto++);
if (LuceneTestCase.VERBOSE) {
if (useTermOrd) {
System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() + " using ord=" + fieldAndTerm.ord);
} else {
System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString());
}
}
} else {
// Seek by previous saved TermState
int idx = random.nextInt(termStates.size());
fieldAndTerm = termStateTerms.get(idx);
if (LuceneTestCase.VERBOSE) {
System.out.println("\nTEST: seek using TermState to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString());
}
termState = termStates.get(idx);
}
Terms terms = fieldsSource.terms(fieldAndTerm.field);
assertNotNull(terms);
termsEnum = terms.iterator();
if (!useTermState) {
if (useTermOrd) {
// Try seek by ord sometimes:
try {
termsEnum.seekExact(fieldAndTerm.ord);
} catch (UnsupportedOperationException uoe) {
supportsOrds = false;
assertTrue(termsEnum.seekExact(fieldAndTerm.term));
}
} else {
assertTrue(termsEnum.seekExact(fieldAndTerm.term));
}
} else {
termsEnum.seekExact(fieldAndTerm.term, termState);
}
// check we really seeked to the right place
assertEquals(fieldAndTerm.term, termsEnum.term());
long termOrd;
if (supportsOrds) {
try {
termOrd = termsEnum.ord();
} catch (UnsupportedOperationException uoe) {
supportsOrds = false;
termOrd = -1;
}
} else {
termOrd = -1;
}
if (termOrd != -1) {
// PostingsFormat supports ords
assertEquals(fieldAndTerm.ord, termsEnum.ord());
}
boolean savedTermState = false;
if (options.contains(Option.TERM_STATE) && !useTermState && random.nextInt(5) == 1) {
// Save away this TermState:
termStates.add(termsEnum.termState());
termStateTerms.add(fieldAndTerm);
savedTermState = true;
}
verifyEnum(random, threadState, fieldAndTerm.field, fieldAndTerm.term, termsEnum, maxTestOptions, maxIndexOptions, options, alwaysTestMax);
// Sometimes save term state after pulling the enum:
if (options.contains(Option.TERM_STATE) && !useTermState && !savedTermState && random.nextInt(5) == 1) {
// Save away this TermState:
termStates.add(termsEnum.termState());
termStateTerms.add(fieldAndTerm);
useTermState = true;
}
// from the same term:
if (alwaysTestMax || random.nextInt(10) == 7) {
// Try same term again
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: try enum again on same term");
}
verifyEnum(random, threadState, fieldAndTerm.field, fieldAndTerm.term, termsEnum, maxTestOptions, maxIndexOptions, options, alwaysTestMax);
}
}
// Test Terms.intersect:
for (String field : fields.keySet()) {
while (true) {
Automaton a = AutomatonTestUtil.randomAutomaton(random);
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
// Keep retrying until we get an A that will really "use" the PF's intersect code:
continue;
}
// System.out.println("A:\n" + a.toDot());
BytesRef startTerm = null;
if (random.nextBoolean()) {
RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
for (int iter = 0; iter < 100; iter++) {
int[] codePoints = ras.getRandomAcceptedString(random);
if (codePoints.length == 0) {
continue;
}
startTerm = new BytesRef(UnicodeUtil.newString(codePoints, 0, codePoints.length));
break;
}
// Don't allow empty string startTerm:
if (startTerm == null) {
continue;
}
}
TermsEnum intersected = fieldsSource.terms(field).intersect(ca, startTerm);
Set<BytesRef> intersectedTerms = new HashSet<BytesRef>();
BytesRef term;
while ((term = intersected.next()) != null) {
if (startTerm != null) {
// NOTE: not <=
assertTrue(startTerm.compareTo(term) < 0);
}
intersectedTerms.add(BytesRef.deepCopyOf(term));
verifyEnum(random, threadState, field, term, intersected, maxTestOptions, maxIndexOptions, options, alwaysTestMax);
}
if (ca.runAutomaton == null) {
assertTrue(intersectedTerms.isEmpty());
} else {
for (BytesRef term2 : fields.get(field).keySet()) {
boolean expected;
if (startTerm != null && startTerm.compareTo(term2) >= 0) {
expected = false;
} else {
expected = ca.runAutomaton.run(term2.bytes, term2.offset, term2.length);
}
assertEquals("term=" + term2, expected, intersectedTerms.contains(term2));
}
}
break;
}
}
}
use of org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings in project lucene-solr by apache.
the class TestAutomaton method testRandomFinite.
public void testRandomFinite() throws Exception {
int numTerms = atLeast(10);
int iters = atLeast(100);
if (VERBOSE) {
System.out.println("TEST: numTerms=" + numTerms + " iters=" + iters);
}
Set<BytesRef> terms = new HashSet<>();
while (terms.size() < numTerms) {
terms.add(new BytesRef(getRandomString()));
}
Automaton a = unionTerms(terms);
assertSame(terms, a);
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " numTerms=" + terms.size() + " a.numStates=" + a.getNumStates());
/*
System.out.println(" terms:");
for(BytesRef term : terms) {
System.out.println(" " + term);
}
*/
}
switch(random().nextInt(15)) {
case 0:
// concatenate prefix
{
if (VERBOSE) {
System.out.println(" op=concat prefix");
}
Set<BytesRef> newTerms = new HashSet<>();
BytesRef prefix = new BytesRef(getRandomString());
BytesRefBuilder newTerm = new BytesRefBuilder();
for (BytesRef term : terms) {
newTerm.copyBytes(prefix);
newTerm.append(term);
newTerms.add(newTerm.toBytesRef());
}
terms = newTerms;
boolean wasDeterministic1 = a.isDeterministic();
a = Operations.concatenate(Automata.makeString(prefix.utf8ToString()), a);
assertEquals(wasDeterministic1, a.isDeterministic());
}
break;
case 1:
// concatenate suffix
{
BytesRef suffix = new BytesRef(getRandomString());
if (VERBOSE) {
System.out.println(" op=concat suffix " + suffix);
}
Set<BytesRef> newTerms = new HashSet<>();
BytesRefBuilder newTerm = new BytesRefBuilder();
for (BytesRef term : terms) {
newTerm.copyBytes(term);
newTerm.append(suffix);
newTerms.add(newTerm.toBytesRef());
}
terms = newTerms;
a = Operations.concatenate(a, Automata.makeString(suffix.utf8ToString()));
}
break;
case 2:
// determinize
if (VERBOSE) {
System.out.println(" op=determinize");
}
a = Operations.determinize(a, Integer.MAX_VALUE);
assertTrue(a.isDeterministic());
break;
case 3:
if (a.getNumStates() < 100) {
if (VERBOSE) {
System.out.println(" op=minimize");
}
// minimize
a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
} else if (VERBOSE) {
System.out.println(" skip op=minimize: too many states (" + a.getNumStates() + ")");
}
break;
case 4:
// union
{
if (VERBOSE) {
System.out.println(" op=union");
}
Set<BytesRef> newTerms = new HashSet<>();
int numNewTerms = random().nextInt(5);
while (newTerms.size() < numNewTerms) {
newTerms.add(new BytesRef(getRandomString()));
}
terms.addAll(newTerms);
Automaton newA = unionTerms(newTerms);
a = Operations.union(a, newA);
}
break;
case 5:
// optional
{
if (VERBOSE) {
System.out.println(" op=optional");
}
// NOTE: This can add a dead state:
a = Operations.optional(a);
terms.add(new BytesRef());
}
break;
case 6:
// minus finite
{
if (VERBOSE) {
System.out.println(" op=minus finite");
}
if (terms.size() > 0) {
RandomAcceptedStrings rasl = new RandomAcceptedStrings(Operations.removeDeadStates(a));
Set<BytesRef> toRemove = new HashSet<>();
int numToRemove = TestUtil.nextInt(random(), 1, (terms.size() + 1) / 2);
while (toRemove.size() < numToRemove) {
int[] ints = rasl.getRandomAcceptedString(random());
BytesRef term = new BytesRef(UnicodeUtil.newString(ints, 0, ints.length));
if (toRemove.contains(term) == false) {
toRemove.add(term);
}
}
for (BytesRef term : toRemove) {
boolean removed = terms.remove(term);
assertTrue(removed);
}
Automaton a2 = unionTerms(toRemove);
a = Operations.minus(a, a2, Integer.MAX_VALUE);
}
}
break;
case 7:
{
// minus infinite
List<Automaton> as = new ArrayList<>();
int count = TestUtil.nextInt(random(), 1, 5);
Set<Integer> prefixes = new HashSet<>();
while (prefixes.size() < count) {
// prefix is a leading ascii byte; we remove <prefix>* from a
int prefix = random().nextInt(128);
prefixes.add(prefix);
}
if (VERBOSE) {
System.out.println(" op=minus infinite prefixes=" + prefixes);
}
for (int prefix : prefixes) {
// prefix is a leading ascii byte; we remove <prefix>* from a
Automaton a2 = new Automaton();
int init = a2.createState();
int state = a2.createState();
a2.addTransition(init, state, prefix);
a2.setAccept(state, true);
a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a2.finishState();
as.add(a2);
Iterator<BytesRef> it = terms.iterator();
while (it.hasNext()) {
BytesRef term = it.next();
if (term.length > 0 && (term.bytes[term.offset] & 0xFF) == prefix) {
it.remove();
}
}
}
Automaton a2 = randomNoOp(Operations.union(as));
a = Operations.minus(a, a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
break;
case 8:
{
int count = TestUtil.nextInt(random(), 10, 20);
if (VERBOSE) {
System.out.println(" op=intersect infinite count=" + count);
}
// intersect infinite
List<Automaton> as = new ArrayList<>();
Set<Integer> prefixes = new HashSet<>();
while (prefixes.size() < count) {
int prefix = random().nextInt(128);
prefixes.add(prefix);
}
if (VERBOSE) {
System.out.println(" prefixes=" + prefixes);
}
for (int prefix : prefixes) {
// prefix is a leading ascii byte; we retain <prefix>* in a
Automaton a2 = new Automaton();
int init = a2.createState();
int state = a2.createState();
a2.addTransition(init, state, prefix);
a2.setAccept(state, true);
a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a2.finishState();
as.add(a2);
prefixes.add(prefix);
}
Automaton a2 = Operations.union(as);
if (random().nextBoolean()) {
a2 = Operations.determinize(a2, DEFAULT_MAX_DETERMINIZED_STATES);
} else if (random().nextBoolean()) {
a2 = MinimizationOperations.minimize(a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
a = Operations.intersection(a, a2);
Iterator<BytesRef> it = terms.iterator();
while (it.hasNext()) {
BytesRef term = it.next();
if (term.length == 0 || prefixes.contains(term.bytes[term.offset] & 0xff) == false) {
if (VERBOSE) {
System.out.println(" drop term=" + term);
}
it.remove();
} else {
if (VERBOSE) {
System.out.println(" keep term=" + term);
}
}
}
}
break;
case 9:
// reverse
{
if (VERBOSE) {
System.out.println(" op=reverse");
}
a = Operations.reverse(a);
Set<BytesRef> newTerms = new HashSet<>();
for (BytesRef term : terms) {
newTerms.add(new BytesRef(new StringBuilder(term.utf8ToString()).reverse().toString()));
}
terms = newTerms;
}
break;
case 10:
if (VERBOSE) {
System.out.println(" op=randomNoOp");
}
a = randomNoOp(a);
break;
case 11:
// interval
{
int min = random().nextInt(1000);
int max = min + random().nextInt(50);
// digits must be non-zero else we make cycle
int digits = Integer.toString(max).length();
if (VERBOSE) {
System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
}
a = Operations.union(a, Automata.makeDecimalInterval(min, max, digits));
StringBuilder b = new StringBuilder();
for (int i = 0; i < digits; i++) {
b.append('0');
}
String prefix = b.toString();
for (int i = min; i <= max; i++) {
String s = Integer.toString(i);
if (s.length() < digits) {
// Left-fill with 0s
s = prefix.substring(s.length()) + s;
}
terms.add(new BytesRef(s));
}
}
break;
case 12:
if (VERBOSE) {
System.out.println(" op=remove the empty string");
}
a = Operations.minus(a, Automata.makeEmptyString(), DEFAULT_MAX_DETERMINIZED_STATES);
terms.remove(new BytesRef());
break;
case 13:
if (VERBOSE) {
System.out.println(" op=add the empty string");
}
a = Operations.union(a, Automata.makeEmptyString());
terms.add(new BytesRef());
break;
case 14:
// Safety in case we are really unlucky w/ the dice:
if (terms.size() <= numTerms * 3) {
if (VERBOSE) {
System.out.println(" op=concat finite automaton");
}
int count = random().nextBoolean() ? 2 : 3;
Set<BytesRef> addTerms = new HashSet<>();
while (addTerms.size() < count) {
addTerms.add(new BytesRef(getRandomString()));
}
if (VERBOSE) {
for (BytesRef term : addTerms) {
System.out.println(" term=" + term);
}
}
Automaton a2 = unionTerms(addTerms);
Set<BytesRef> newTerms = new HashSet<>();
if (random().nextBoolean()) {
// suffix
if (VERBOSE) {
System.out.println(" do suffix");
}
a = Operations.concatenate(a, randomNoOp(a2));
BytesRefBuilder newTerm = new BytesRefBuilder();
for (BytesRef term : terms) {
for (BytesRef suffix : addTerms) {
newTerm.copyBytes(term);
newTerm.append(suffix);
newTerms.add(newTerm.toBytesRef());
}
}
} else {
// prefix
if (VERBOSE) {
System.out.println(" do prefix");
}
a = Operations.concatenate(randomNoOp(a2), a);
BytesRefBuilder newTerm = new BytesRefBuilder();
for (BytesRef term : terms) {
for (BytesRef prefix : addTerms) {
newTerm.copyBytes(prefix);
newTerm.append(term);
newTerms.add(newTerm.toBytesRef());
}
}
}
terms = newTerms;
}
break;
default:
throw new AssertionError();
}
assertSame(terms, a);
assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
if (random().nextInt(10) == 7) {
a = verifyTopoSort(a);
}
}
assertSame(terms, a);
}
use of org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings in project lucene-solr by apache.
the class TestAutomaton method testReverseRandom2.
public void testReverseRandom2() throws Exception {
int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
//System.out.println("TEST: iter=" + iter);
Automaton a = AutomatonTestUtil.randomAutomaton(random());
if (random().nextBoolean()) {
a = Operations.removeDeadStates(a);
}
Automaton ra = Operations.reverse(a);
Automaton rda = Operations.determinize(ra, Integer.MAX_VALUE);
if (Operations.isEmpty(a)) {
assertTrue(Operations.isEmpty(rda));
continue;
}
RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
for (int iter2 = 0; iter2 < 20; iter2++) {
// Find string accepted by original automaton
int[] s = ras.getRandomAcceptedString(random());
// Reverse it
for (int j = 0; j < s.length / 2; j++) {
int x = s[j];
s[j] = s[s.length - j - 1];
s[s.length - j - 1] = x;
}
//System.out.println("TEST: iter2=" + iter2 + " s=" + Arrays.toString(s));
// Make sure reversed automaton accepts it
assertTrue(Operations.run(rda, new IntsRef(s, 0, s.length)));
}
}
}
Aggregations