use of org.apache.lucene.util.automaton.Transition in project lucene-solr by apache.
the class TermAutomatonQuery method finish.
/**
* Call this once you are done adding states/transitions.
* @param maxDeterminizedStates Maximum number of states created when
* determinizing the automaton. Higher numbers allow this operation to
* consume more memory but allow more complex automatons.
*/
public void finish(int maxDeterminizedStates) {
Automaton automaton = builder.finish();
// System.out.println("before det:\n" + automaton.toDot());
Transition t = new Transition();
if (anyTermID != -1) {
// Make sure there are no leading or trailing ANY:
int count = automaton.initTransition(0, t);
for (int i = 0; i < count; i++) {
automaton.getNextTransition(t);
if (anyTermID >= t.min && anyTermID <= t.max) {
throw new IllegalStateException("automaton cannot lead with an ANY transition");
}
}
int numStates = automaton.getNumStates();
for (int i = 0; i < numStates; i++) {
count = automaton.initTransition(i, t);
for (int j = 0; j < count; j++) {
automaton.getNextTransition(t);
if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) {
throw new IllegalStateException("automaton cannot end with an ANY transition");
}
}
}
int termCount = termToID.size();
// We have to carefully translate these transitions so automaton
// realizes they also match all other terms:
Automaton newAutomaton = new Automaton();
for (int i = 0; i < numStates; i++) {
newAutomaton.createState();
newAutomaton.setAccept(i, automaton.isAccept(i));
}
for (int i = 0; i < numStates; i++) {
count = automaton.initTransition(i, t);
for (int j = 0; j < count; j++) {
automaton.getNextTransition(t);
int min, max;
if (t.min <= anyTermID && anyTermID <= t.max) {
// Match any term
min = 0;
max = termCount - 1;
} else {
min = t.min;
max = t.max;
}
newAutomaton.addTransition(t.source, t.dest, min, max);
}
}
newAutomaton.finishState();
automaton = newAutomaton;
}
det = Operations.removeDeadStates(Operations.determinize(automaton, maxDeterminizedStates));
if (det.isAccept(0)) {
throw new IllegalStateException("cannot accept the empty string");
}
}
use of org.apache.lucene.util.automaton.Transition in project lucene-solr by apache.
the class FSTUtil method intersectPrefixPaths.
/**
* Enumerates all minimal prefix paths in the automaton that also intersect the FST,
* accumulating the FST end node and output for each path.
*/
public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst) throws IOException {
assert a.isDeterministic();
final List<Path<T>> queue = new ArrayList<>();
final List<Path<T>> endNodes = new ArrayList<>();
if (a.getNumStates() == 0) {
return endNodes;
}
queue.add(new Path<>(0, fst.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(), new IntsRefBuilder()));
final FST.Arc<T> scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
Transition t = new Transition();
while (queue.size() != 0) {
final Path<T> path = queue.remove(queue.size() - 1);
if (a.isAccept(path.state)) {
endNodes.add(path);
// we accept all further paths too
continue;
}
IntsRefBuilder currentInput = path.input;
int count = a.initTransition(path.state, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
final int min = t.min;
final int max = t.max;
if (min == max) {
final FST.Arc<T> nextArc = fst.findTargetArc(t.min, path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(t.min);
queue.add(new Path<>(t.dest, new FST.Arc<T>().copyFrom(nextArc), fst.outputs.add(path.output, nextArc.output), newInput));
}
} else {
// TODO: if this transition's TO state is accepting, and
// it accepts the entire range possible in the FST (ie. 0 to 255),
// we can simply use the prefix as the accepted state instead of
// looking up all the ranges and terminate early
// here. This just shifts the work from one queue
// (this one) to another (the completion search
// done in AnalyzingSuggester).
FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader);
while (nextArc != null && nextArc.label <= max) {
assert nextArc.label <= max;
assert nextArc.label >= min : nextArc.label + " " + min;
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(nextArc.label);
queue.add(new Path<>(t.dest, new FST.Arc<T>().copyFrom(nextArc), fst.outputs.add(path.output, nextArc.output), newInput));
// used in assert
final int label = nextArc.label;
nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader);
assert nextArc == null || label < nextArc.label : "last: " + label + " next: " + nextArc.label;
}
}
}
}
return endNodes;
}
use of org.apache.lucene.util.automaton.Transition in project lucene-solr by apache.
the class CompletionTokenStream method replaceSep.
// Replaces SEP with epsilon or remaps them if
// we were asked to preserve them:
private static Automaton replaceSep(Automaton a, boolean preserveSep, int sepLabel) {
Automaton result = new Automaton();
// Copy all states over
int numStates = a.getNumStates();
for (int s = 0; s < numStates; s++) {
result.createState();
result.setAccept(s, a.isAccept(s));
}
// Go in reverse topo sort so we know we only have to
// make one pass:
Transition t = new Transition();
int[] topoSortStates = Operations.topoSortStates(a);
for (int i = 0; i < topoSortStates.length; i++) {
int state = topoSortStates[topoSortStates.length - 1 - i];
int count = a.initTransition(state, t);
for (int j = 0; j < count; j++) {
a.getNextTransition(t);
if (t.min == TokenStreamToAutomaton.POS_SEP) {
assert t.max == TokenStreamToAutomaton.POS_SEP;
if (preserveSep) {
// Remap to SEP_LABEL:
result.addTransition(state, t.dest, sepLabel);
} else {
result.addEpsilon(state, t.dest);
}
} else if (t.min == TokenStreamToAutomaton.HOLE) {
assert t.max == TokenStreamToAutomaton.HOLE;
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
// match another hole at search time. Note that
// it will also match an empty-string token ... if
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
result.addEpsilon(state, t.dest);
} else {
result.addTransition(state, t.dest, t.min, t.max);
}
}
}
result.finishState();
return result;
}
use of org.apache.lucene.util.automaton.Transition in project elasticsearch by elastic.
the class XAnalyzingSuggester method replaceSep.
// Replaces SEP with epsilon or remaps them if
// we were asked to preserve them:
private Automaton replaceSep(Automaton a) {
Automaton result = new Automaton();
// Copy all states over
int numStates = a.getNumStates();
for (int s = 0; s < numStates; s++) {
result.createState();
result.setAccept(s, a.isAccept(s));
}
// Go in reverse topo sort so we know we only have to
// make one pass:
Transition t = new Transition();
int[] topoSortStates = topoSortStates(a);
for (int i = 0; i < topoSortStates.length; i++) {
int state = topoSortStates[topoSortStates.length - 1 - i];
int count = a.initTransition(state, t);
for (int j = 0; j < count; j++) {
a.getNextTransition(t);
if (t.min == TokenStreamToAutomaton.POS_SEP) {
assert t.max == TokenStreamToAutomaton.POS_SEP;
if (preserveSep) {
// Remap to SEP_LABEL:
result.addTransition(state, t.dest, SEP_LABEL);
} else {
result.addEpsilon(state, t.dest);
}
} else if (t.min == TokenStreamToAutomaton.HOLE) {
assert t.max == TokenStreamToAutomaton.HOLE;
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
// match another hole at search time. Note that
// it will also match an empty-string token ... if
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
result.addEpsilon(state, t.dest);
} else {
result.addTransition(state, t.dest, t.min, t.max);
}
}
}
result.finishState();
return result;
}
use of org.apache.lucene.util.automaton.Transition in project elasticsearch by elastic.
the class XAnalyzingSuggester method topoSortStates.
private int[] topoSortStates(Automaton a) {
int[] states = new int[a.getNumStates()];
final Set<Integer> visited = new HashSet<>();
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.add(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i = 0; i < count; i++) {
a.getNextTransition(t);
if (!visited.contains(t.dest)) {
visited.add(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
Aggregations