use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project elasticsearch by elastic.
the class QueryStringQueryBuilderTests method testToQueryRegExpQueryTooComplex.
public void testToQueryRegExpQueryTooComplex() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
QueryStringQueryBuilder queryBuilder = queryStringQuery("/[ac]*a[ac]{50,200}/").defaultField(STRING_FIELD_NAME);
TooComplexToDeterminizeException e = expectThrows(TooComplexToDeterminizeException.class, () -> queryBuilder.toQuery(createShardContext()));
assertThat(e.getMessage(), containsString("Determinizing [ac]*"));
assertThat(e.getMessage(), containsString("would result in more than 10000 states"));
}
use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project lucene-solr by apache.
the class TestSynonymGraphFilter method testRandomSyns.
public void testRandomSyns() throws Exception {
int synCount = atLeast(10);
double bias = random().nextDouble();
boolean dedup = random().nextBoolean();
boolean flatten = random().nextBoolean();
SynonymMap.Builder b = new SynonymMap.Builder(dedup);
List<OneSyn> syns = new ArrayList<>();
// Makes random syns from random a / b tokens, mapping to random x / y tokens
if (VERBOSE) {
System.out.println("TEST: make " + synCount + " syns");
System.out.println(" bias for a over b=" + bias);
System.out.println(" dedup=" + dedup);
System.out.println(" flatten=" + flatten);
}
int maxSynLength = 0;
for (int i = 0; i < synCount; i++) {
OneSyn syn = new OneSyn();
syn.in = randomBinaryChars(1, 5, bias, 'a');
syn.out = randomBinaryChars(1, 5, 0.5, 'x');
syn.keepOrig = random().nextBoolean();
syns.add(syn);
maxSynLength = Math.max(maxSynLength, syn.in.length);
if (VERBOSE) {
System.out.println(" " + syn);
}
add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig);
}
// Compute max allowed lookahead for flatten filter:
int maxFlattenLookahead = 0;
if (flatten) {
for (int i = 0; i < synCount; i++) {
OneSyn syn1 = syns.get(i);
int count = syn1.out.length;
boolean keepOrig = syn1.keepOrig;
for (int j = 0; j < synCount; j++) {
OneSyn syn2 = syns.get(i);
keepOrig |= syn2.keepOrig;
if (syn1.in.equals(syn2.in)) {
count += syn2.out.length;
}
}
if (keepOrig) {
count += syn1.in.length;
}
maxFlattenLookahead = Math.max(maxFlattenLookahead, count);
}
}
// Only used w/ VERBOSE:
Analyzer aNoFlattened;
if (VERBOSE) {
aNoFlattened = getAnalyzer(b, true);
} else {
aNoFlattened = null;
}
Analyzer a;
if (flatten) {
a = getFlattenAnalyzer(b, true);
} else {
a = getAnalyzer(b, true);
}
int iters = atLeast(20);
for (int iter = 0; iter < iters; iter++) {
String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a'));
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " doc=" + doc);
}
Automaton expected = slowSynFilter(doc, syns, flatten);
if (VERBOSE) {
System.out.println(" expected:\n" + expected.toDot());
if (flatten) {
Automaton unflattened = toAutomaton(aNoFlattened.tokenStream("field", new StringReader(doc)));
System.out.println(" actual unflattened:\n" + unflattened.toDot());
}
}
Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc)));
if (VERBOSE) {
System.out.println(" actual:\n" + actual.toDot());
}
assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength, synFilter.getMaxLookaheadUsed() <= maxSynLength);
if (flatten) {
assertTrue("flatten maxLookaheadUsed=" + flattenFilter.getMaxLookaheadUsed() + " maxFlattenLookahead=" + maxFlattenLookahead, flattenFilter.getMaxLookaheadUsed() <= maxFlattenLookahead);
}
checkAnalysisConsistency(random(), a, random().nextBoolean(), doc);
// output token that also happens to be in the input:
try {
actual = Operations.determinize(actual, 50000);
} catch (TooComplexToDeterminizeException tctde) {
// Unfortunately the syns can easily create difficult-to-determinize graphs:
assertTrue(approxEquals(actual, expected));
continue;
}
try {
expected = Operations.determinize(expected, 50000);
} catch (TooComplexToDeterminizeException tctde) {
// Unfortunately the syns can easily create difficult-to-determinize graphs:
assertTrue(approxEquals(actual, expected));
continue;
}
assertTrue(approxEquals(actual, expected));
assertTrue(Operations.sameLanguage(actual, expected));
}
a.close();
}
use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project elasticsearch by elastic.
the class QueryStringQueryBuilderTests method testToQueryRegExpQueryMaxDeterminizedStatesParsing.
/**
* Validates that {@code max_determinized_states} can be parsed and lowers the allowed number of determinized states.
*/
public void testToQueryRegExpQueryMaxDeterminizedStatesParsing() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
XContentBuilder builder = JsonXContent.contentBuilder();
builder.startObject();
{
builder.startObject("query_string");
{
builder.field("query", "/[ac]*a[ac]{1,10}/");
builder.field("default_field", STRING_FIELD_NAME);
builder.field("max_determinized_states", 10);
}
builder.endObject();
}
builder.endObject();
QueryBuilder queryBuilder = new QueryParseContext(createParser(builder)).parseInnerQueryBuilder();
TooComplexToDeterminizeException e = expectThrows(TooComplexToDeterminizeException.class, () -> queryBuilder.toQuery(createShardContext()));
assertThat(e.getMessage(), containsString("Determinizing [ac]*"));
assertThat(e.getMessage(), containsString("would result in more than 10 states"));
}
Aggregations