use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class AnalyzingSuggester method replaceSep.
// Replaces SEP with epsilon or remaps them if
// we were asked to preserve them:
private Automaton replaceSep(Automaton a) {
int numStates = a.getNumStates();
Automaton.Builder result = new Automaton.Builder(numStates, a.getNumTransitions());
// Copy all states over
result.copyStates(a);
// Go in reverse topo sort so we know we only have to
// make one pass:
Transition t = new Transition();
int[] topoSortStates = Operations.topoSortStates(a);
for (int i = 0; i < topoSortStates.length; i++) {
int state = topoSortStates[topoSortStates.length - 1 - i];
int count = a.initTransition(state, t);
for (int j = 0; j < count; j++) {
a.getNextTransition(t);
if (t.min == TokenStreamToAutomaton.POS_SEP) {
assert t.max == TokenStreamToAutomaton.POS_SEP;
if (preserveSep) {
// Remap to SEP_LABEL:
result.addTransition(state, t.dest, SEP_LABEL);
} else {
result.addEpsilon(state, t.dest);
}
} else if (t.min == TokenStreamToAutomaton.HOLE) {
assert t.max == TokenStreamToAutomaton.HOLE;
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
// match another hole at search time. Note that
// it will also match an empty-string token ... if
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
result.addEpsilon(state, t.dest);
} else {
result.addTransition(state, t.dest, t.min, t.max);
}
}
}
return result.finish();
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class AnalyzingSuggester method toLookupAutomaton.
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
Automaton automaton = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
}
automaton = replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
return automaton;
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class ContextQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores, boost));
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
final TreeSet<Integer> contextLengths = new TreeSet<>();
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
ContextMetaData contextMetaData = entry.getValue();
contextMap.put(entry.getKey(), contextMetaData.boost);
contextLengths.add(entry.getKey().length);
}
int[] contextLengthArray = new int[contextLengths.size()];
final Iterator<Integer> iterator = contextLengths.descendingIterator();
for (int i = 0; iterator.hasNext(); i++) {
contextLengthArray[i] = iterator.next();
}
return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class TestDuelingAnalyzers method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
Automaton single = new Automaton();
int initial = single.createState();
int accept = single.createState();
single.setAccept(accept, true);
// build an automaton matching this jvm's letter definition
for (int i = 0; i <= 0x10FFFF; i++) {
if (Character.isLetter(i)) {
single.addTransition(initial, accept, i);
}
}
Automaton repeat = Operations.repeat(single);
jvmLetter = new CharacterRunAutomaton(repeat);
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class TestSynonymGraphFilter method toAutomaton.
private Automaton toAutomaton(TokenStream ts) throws IOException {
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
Automaton a = new Automaton();
int srcNode = -1;
int destNode = -1;
int state = a.createState();
while (ts.incrementToken()) {
assert termAtt.length() == 1;
char c = termAtt.charAt(0);
int posInc = posIncAtt.getPositionIncrement();
if (posInc != 0) {
srcNode += posInc;
while (state < srcNode) {
state = a.createState();
}
}
destNode = srcNode + posLenAtt.getPositionLength();
while (state < destNode) {
state = a.createState();
}
a.addTransition(srcNode, destNode, c);
}
ts.end();
ts.close();
a.finishState();
a.setAccept(destNode, true);
return a;
}
Aggregations