use of org.apache.lucene.util.automaton.Automaton in project elasticsearch by elastic.
the class TransportReindexAction method buildRemoteWhitelist.
/**
* Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist
* the world.
*/
static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) {
if (whitelist.isEmpty()) {
return new CharacterRunAutomaton(Automata.makeEmpty());
}
Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY));
automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
if (Operations.isTotal(automaton)) {
throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them.");
}
return new CharacterRunAutomaton(automaton);
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class CompletionTokenStream method incrementToken.
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (finiteStrings == null) {
Automaton automaton = toAutomaton();
finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
}
IntsRef string = finiteStrings.next();
if (string == null) {
return false;
}
// now we have UTF-8
Util.toBytesRef(string, bytesAtt.builder());
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
}
if (payload != null) {
payloadAttr.setPayload(this.payload);
}
return true;
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class CompletionTokenStream method toAutomaton.
/**
* Converts the tokenStream to an automaton
*/
public Automaton toAutomaton(boolean unicodeAware) throws IOException {
// TODO refactor this
// maybe we could hook up a modified automaton from TermAutomatonQuery here?
Automaton automaton = null;
try {
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
final TokenStreamToAutomaton tsta;
if (preserveSep) {
tsta = new EscapingTokenStreamToAutomaton((char) SEP_LABEL);
} else {
// When we're not preserving sep, we don't steal 0xff
// byte, so we don't need to do any escaping:
tsta = new TokenStreamToAutomaton();
}
tsta.setPreservePositionIncrements(preservePositionIncrements);
tsta.setUnicodeArcs(unicodeAware);
automaton = tsta.toAutomaton(inputTokenStream);
} finally {
IOUtils.closeWhileHandlingException(inputTokenStream);
}
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = replaceSep(automaton, preserveSep, SEP_LABEL);
// This automaton should not blow up during determinize:
return Operations.determinize(automaton, maxGraphExpansions);
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class ContextQuery method toContextAutomaton.
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
if (matchAllContexts || contexts.size() == 0) {
return Operations.concatenate(matchAllAutomaton, sep);
} else {
Automaton contextsAutomaton = null;
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
final ContextMetaData contextMetaData = entry.getValue();
final IntsRef ref = entry.getKey();
Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
if (contextMetaData.exact == false) {
contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
}
contextAutomaton = Operations.concatenate(contextAutomaton, sep);
if (contextsAutomaton == null) {
contextsAutomaton = contextAutomaton;
} else {
contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
}
}
return contextsAutomaton;
}
}
use of org.apache.lucene.util.automaton.Automaton in project lucene-solr by apache.
the class AnalyzingSuggester method toAutomaton.
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
// Analyze surface form:
Automaton automaton;
try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
automaton = ts2a.toAutomaton(ts);
}
automaton = replaceSep(automaton);
automaton = convertAutomaton(automaton);
// graph using SynFilter or WDF):
return automaton;
}
Aggregations