use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class FiniteStringsIteratorTest method testSingletonNoLimit.
public void testSingletonNoLimit() {
Automaton a = Automata.makeString("foobar");
FiniteStringsIterator iterator = new FiniteStringsIterator(a);
List<IntsRef> actual = getFiniteStrings(iterator);
assertEquals(1, actual.size());
IntsRefBuilder scratch = new IntsRefBuilder();
Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
assertTrue(actual.contains(scratch.get()));
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class FiniteStringsIteratorTest method testRandomFiniteStrings1.
public void testRandomFiniteStrings1() {
int numStrings = atLeast(100);
if (VERBOSE) {
System.out.println("TEST: numStrings=" + numStrings);
}
Set<IntsRef> strings = new HashSet<>();
List<Automaton> automata = new ArrayList<>();
IntsRefBuilder scratch = new IntsRefBuilder();
for (int i = 0; i < numStrings; i++) {
String s = TestUtil.randomSimpleString(random(), 1, 200);
Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
if (strings.add(scratch.toIntsRef())) {
automata.add(Automata.makeString(s));
if (VERBOSE) {
System.out.println(" add string=" + s);
}
}
}
// TODO: we could sometimes use
// DaciukMihovAutomatonBuilder here
// TODO: what other random things can we do here...
Automaton a = Operations.union(automata);
if (random().nextBoolean()) {
a = MinimizationOperations.minimize(a, 1000000);
if (VERBOSE) {
System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
}
} else if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: a.determinize");
}
a = Operations.determinize(a, 1000000);
} else if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: a.removeDeadStates");
}
a = Operations.removeDeadStates(a);
}
FiniteStringsIterator iterator = new FiniteStringsIterator(a);
List<IntsRef> actual = getFiniteStrings(iterator);
assertFiniteStringsRecursive(a, actual);
if (!strings.equals(new HashSet<>(actual))) {
System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
List<IntsRef> x = new ArrayList<>(strings);
Collections.sort(x);
List<IntsRef> y = new ArrayList<>(actual);
Collections.sort(y);
int end = Math.min(x.size(), y.size());
for (int i = 0; i < end; i++) {
System.out.println(" i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
}
fail("wrong strings found");
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestAutomaton method testMakeBinaryIntervalRandom.
public void testMakeBinaryIntervalRandom() throws Exception {
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
BytesRef minTerm = TestUtil.randomBinaryTerm(random());
boolean minInclusive = random().nextBoolean();
BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
boolean maxInclusive = random().nextBoolean();
Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
for (int iter2 = 0; iter2 < 500; iter2++) {
BytesRef term = TestUtil.randomBinaryTerm(random());
int minCmp = minTerm.compareTo(term);
int maxCmp = maxTerm.compareTo(term);
boolean expected;
if (minCmp > 0 || maxCmp < 0) {
expected = false;
} else if (minCmp == 0 && maxCmp == 0) {
expected = minInclusive && maxInclusive;
} else if (minCmp == 0) {
expected = minInclusive;
} else if (maxCmp == 0) {
expected = maxInclusive;
} else {
expected = true;
}
if (VERBOSE) {
System.out.println(" check term=" + term + " expected=" + expected);
}
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(term, intsBuilder);
assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
}
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class Operations method getSingleton.
/** If this automaton accepts a single input, return it. Else, return null.
* The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
if (a.isDeterministic() == false) {
throw new IllegalArgumentException("input automaton must be deterministic");
}
IntsRefBuilder builder = new IntsRefBuilder();
HashSet<Integer> visited = new HashSet<>();
int s = 0;
Transition t = new Transition();
while (true) {
visited.add(s);
if (a.isAccept(s) == false) {
if (a.getNumTransitions(s) == 1) {
a.getTransition(s, 0, t);
if (t.min == t.max && !visited.contains(t.dest)) {
builder.append(t.min);
s = t.dest;
continue;
}
}
} else if (a.getNumTransitions(s) == 0) {
return builder.get();
}
// Automaton accepts more than one string:
return null;
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class BaseSynonymParserTestCase method assertEntryAbsent.
/**
* Validates that there are no synonyms for the given word.
* @param synonynMap the generated synonym map after parsing
* @param word word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
* All spaces will be replaced by word separators.
*/
public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
assertNull("There should be no synonyms for: " + word, value);
}
Aggregations