Search in sources :

Example 1 with TooComplexToDeterminizeException

use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project elasticsearch by elastic.

the class QueryStringQueryBuilderTests method testToQueryRegExpQueryTooComplex.

public void testToQueryRegExpQueryTooComplex() throws Exception {
    assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
    QueryStringQueryBuilder queryBuilder = queryStringQuery("/[ac]*a[ac]{50,200}/").defaultField(STRING_FIELD_NAME);
    TooComplexToDeterminizeException e = expectThrows(TooComplexToDeterminizeException.class, () -> queryBuilder.toQuery(createShardContext()));
    assertThat(e.getMessage(), containsString("Determinizing [ac]*"));
    assertThat(e.getMessage(), containsString("would result in more than 10000 states"));
}
Also used : TooComplexToDeterminizeException(org.apache.lucene.util.automaton.TooComplexToDeterminizeException)

Example 2 with TooComplexToDeterminizeException

use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project lucene-solr by apache.

the class TestSynonymGraphFilter method testRandomSyns.

public void testRandomSyns() throws Exception {
    int synCount = atLeast(10);
    double bias = random().nextDouble();
    boolean dedup = random().nextBoolean();
    boolean flatten = random().nextBoolean();
    SynonymMap.Builder b = new SynonymMap.Builder(dedup);
    List<OneSyn> syns = new ArrayList<>();
    // Makes random syns from random a / b tokens, mapping to random x / y tokens
    if (VERBOSE) {
        System.out.println("TEST: make " + synCount + " syns");
        System.out.println("  bias for a over b=" + bias);
        System.out.println("  dedup=" + dedup);
        System.out.println("  flatten=" + flatten);
    }
    int maxSynLength = 0;
    for (int i = 0; i < synCount; i++) {
        OneSyn syn = new OneSyn();
        syn.in = randomBinaryChars(1, 5, bias, 'a');
        syn.out = randomBinaryChars(1, 5, 0.5, 'x');
        syn.keepOrig = random().nextBoolean();
        syns.add(syn);
        maxSynLength = Math.max(maxSynLength, syn.in.length);
        if (VERBOSE) {
            System.out.println("  " + syn);
        }
        add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig);
    }
    // Compute max allowed lookahead for flatten filter:
    int maxFlattenLookahead = 0;
    if (flatten) {
        for (int i = 0; i < synCount; i++) {
            OneSyn syn1 = syns.get(i);
            int count = syn1.out.length;
            boolean keepOrig = syn1.keepOrig;
            for (int j = 0; j < synCount; j++) {
                OneSyn syn2 = syns.get(i);
                keepOrig |= syn2.keepOrig;
                if (syn1.in.equals(syn2.in)) {
                    count += syn2.out.length;
                }
            }
            if (keepOrig) {
                count += syn1.in.length;
            }
            maxFlattenLookahead = Math.max(maxFlattenLookahead, count);
        }
    }
    // Only used w/ VERBOSE:
    Analyzer aNoFlattened;
    if (VERBOSE) {
        aNoFlattened = getAnalyzer(b, true);
    } else {
        aNoFlattened = null;
    }
    Analyzer a;
    if (flatten) {
        a = getFlattenAnalyzer(b, true);
    } else {
        a = getAnalyzer(b, true);
    }
    int iters = atLeast(20);
    for (int iter = 0; iter < iters; iter++) {
        String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a'));
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter + " doc=" + doc);
        }
        Automaton expected = slowSynFilter(doc, syns, flatten);
        if (VERBOSE) {
            System.out.println("  expected:\n" + expected.toDot());
            if (flatten) {
                Automaton unflattened = toAutomaton(aNoFlattened.tokenStream("field", new StringReader(doc)));
                System.out.println("  actual unflattened:\n" + unflattened.toDot());
            }
        }
        Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc)));
        if (VERBOSE) {
            System.out.println("  actual:\n" + actual.toDot());
        }
        assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength, synFilter.getMaxLookaheadUsed() <= maxSynLength);
        if (flatten) {
            assertTrue("flatten maxLookaheadUsed=" + flattenFilter.getMaxLookaheadUsed() + " maxFlattenLookahead=" + maxFlattenLookahead, flattenFilter.getMaxLookaheadUsed() <= maxFlattenLookahead);
        }
        checkAnalysisConsistency(random(), a, random().nextBoolean(), doc);
        // output token that also happens to be in the input:
        try {
            actual = Operations.determinize(actual, 50000);
        } catch (TooComplexToDeterminizeException tctde) {
            // Unfortunately the syns can easily create difficult-to-determinize graphs:
            assertTrue(approxEquals(actual, expected));
            continue;
        }
        try {
            expected = Operations.determinize(expected, 50000);
        } catch (TooComplexToDeterminizeException tctde) {
            // Unfortunately the syns can easily create difficult-to-determinize graphs:
            assertTrue(approxEquals(actual, expected));
            continue;
        }
        assertTrue(approxEquals(actual, expected));
        assertTrue(Operations.sameLanguage(actual, expected));
    }
    a.close();
}
Also used : TooComplexToDeterminizeException(org.apache.lucene.util.automaton.TooComplexToDeterminizeException) Automaton(org.apache.lucene.util.automaton.Automaton) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader)

Example 3 with TooComplexToDeterminizeException

use of org.apache.lucene.util.automaton.TooComplexToDeterminizeException in project elasticsearch by elastic.

the class QueryStringQueryBuilderTests method testToQueryRegExpQueryMaxDeterminizedStatesParsing.

/**
     * Validates that {@code max_determinized_states} can be parsed and lowers the allowed number of determinized states.
     */
public void testToQueryRegExpQueryMaxDeterminizedStatesParsing() throws Exception {
    assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
    XContentBuilder builder = JsonXContent.contentBuilder();
    builder.startObject();
    {
        builder.startObject("query_string");
        {
            builder.field("query", "/[ac]*a[ac]{1,10}/");
            builder.field("default_field", STRING_FIELD_NAME);
            builder.field("max_determinized_states", 10);
        }
        builder.endObject();
    }
    builder.endObject();
    QueryBuilder queryBuilder = new QueryParseContext(createParser(builder)).parseInnerQueryBuilder();
    TooComplexToDeterminizeException e = expectThrows(TooComplexToDeterminizeException.class, () -> queryBuilder.toQuery(createShardContext()));
    assertThat(e.getMessage(), containsString("Determinizing [ac]*"));
    assertThat(e.getMessage(), containsString("would result in more than 10 states"));
}
Also used : TooComplexToDeterminizeException(org.apache.lucene.util.automaton.TooComplexToDeterminizeException) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder)

Aggregations

TooComplexToDeterminizeException (org.apache.lucene.util.automaton.TooComplexToDeterminizeException)3 StringReader (java.io.StringReader)1 ArrayList (java.util.ArrayList)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)1 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)1 Automaton (org.apache.lucene.util.automaton.Automaton)1 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)1