Search in sources :

Example 41 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method fuzzingRepetitionBug.

/**
 * This method creates a corpus using fuzzing to
 * check for unexpected, failing constellations
 * regarding repetition queries.
 * By shrinking the accepted result length, it tries
 * to minimize the complexity of the constellations.
 */
public void fuzzingRepetitionBug() throws IOException, QueryException {
    List<String> chars = Arrays.asList("a", "b", "c", "c", "d", "e");
    // Construct index
    Pattern p = Pattern.compile("bccc?d");
    QueryBuilder qb = new QueryBuilder("base");
    // b c{2,3} d
    SpanQuery sq = qb.seq(qb.seg("s:b")).append(qb.repeat(qb.seg("s:c"), 2, 3)).append(qb.seg("s:d")).toQuery();
    Krill ks = new Krill(sq);
    assertEquals(ks.getSpanQuery().toString(), "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
    String lastFailureConf = "";
    int minLength = 6;
    int maxLength = 22;
    int maxDocs = 8;
    // Create fuzzy corpora (1000 trials)
    for (int x = 0; x < 100000; x++) {
        KrillIndex ki = new KrillIndex();
        ArrayList<String> list = new ArrayList<String>();
        int c = 0;
        // Create a corpus of 8 fuzzy docs
        for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
            FieldDocument testDoc = simpleFuzzyFieldDoc(chars, minLength, maxLength);
            String testString = testDoc.doc.getField("base").stringValue();
            Matcher m = p.matcher(testString);
            list.add(testString);
            while (m.find()) c++;
            ki.addDoc(testDoc);
        }
        ;
        ki.commit();
        Result kr = ks.apply(ki);
        // spit out the corpus configurations
        if (c != kr.getTotalResults()) {
            String failureConf = c + ":" + kr.getTotalResults() + " " + list.toString();
            if (lastFailureConf.length() == 0 || failureConf.length() < lastFailureConf.length()) {
                System.err.println(failureConf);
                lastFailureConf = failureConf;
                minLength--;
                maxDocs--;
            }
            ;
        }
        ;
    }
    ;
}
Also used : TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Krill(de.ids_mannheim.korap.Krill)

Example 42 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testRepetitionInSequences.

@Test
public void testRepetitionInSequences() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanQuery sq, sq2;
    // ec{1,2}
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 2, true));
    kr = ki.search(sq, (short) 10);
    // 1-3, 1-4, 4-6
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).getStartPos());
    assertEquals(3, kr.getMatch(0).getEndPos());
    assertEquals(1, kr.getMatch(1).getStartPos());
    assertEquals(4, kr.getMatch(1).getEndPos());
    assertEquals(4, kr.getMatch(2).getStartPos());
    assertEquals(6, kr.getMatch(2).getEndPos());
    // ec{1,2}d
    sq2 = new SpanNextQuery(sq, new SpanTermQuery(new Term("base", "s:d")));
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).startPos);
    assertEquals(5, kr.getMatch(0).endPos);
    assertEquals(4, kr.getMatch(1).startPos);
    assertEquals(7, kr.getMatch(1).endPos);
    // Multiple documents
    ki.addDoc(createFieldDoc1());
    ki.commit();
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 5, kr.getTotalResults());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 43 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testCase2.

/**
 * Skip to
 */
@Test
public void testCase2() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.addDoc(createFieldDoc3());
    ki.addDoc(createFieldDoc2());
    ki.addDoc(createFieldDoc1());
    ki.commit();
    SpanQuery sq;
    // c{2,2}
    // sq = new SpanRepetitionQuery(
    // new SpanTermQuery(new Term("base", "s:c")), 2, 2, true);
    // kr = ki.search(sq, (short) 10);
    // // doc1 2-4, 3-5, 4-6
    // assertEquals((long) 6, kr.getTotalResults());
    // ec{2,2}
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 2, 2, true));
    kr = ki.search(sq, (short) 10);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals(3, kr.getMatch(1).getLocalDocID());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 44 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testRepetitionSnippetBug1.

@Test
public void testRepetitionSnippetBug1() throws IOException, QueryException {
    // Construct index
    Pattern p = Pattern.compile("bccc?d");
    // Der [corenlp/p=ADJA]{2,3} Baum
    QueryBuilder qb = new QueryBuilder("base");
    // b c{2,3} d
    SpanQuery sq = qb.seq(qb.seg("s:b")).append(qb.repeat(qb.seg("s:c"), 2, 3)).append(qb.seg("s:d")).toQuery();
    Krill ks = new Krill(sq);
    assertEquals(ks.getSpanQuery().toString(), "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
    // simpleDocTest
    KrillIndex ki = new KrillIndex();
    ki.addDoc(simpleFieldDoc("abccde"));
    ki.commit();
    Result kr = ks.apply(ki);
    assertEquals(1, kr.getTotalResults());
    // fuzzingRepetitionBug();
    // First fuzzed failure (0 vs 1)
    ki = new KrillIndex();
    // 0
    ki.addDoc(simpleFieldDoc("cccd"));
    // 1
    ki.addDoc(simpleFieldDoc("bccccccaeae"));
    // 2
    ki.addDoc(simpleFieldDoc("cbcedb"));
    ki.commit();
    kr = ks.apply(ki);
    assertEquals(0, kr.getTotalResults());
    // Third fuzzed failure (1 vs 2)
    ki = new KrillIndex();
    ki.addDoc(simpleFieldDoc("bccdcb"));
    ki.addDoc(simpleFieldDoc("ebccce"));
    ki.addDoc(simpleFieldDoc("adbdcd"));
    ki.commit();
    kr = ks.apply(ki);
    assertEquals(1, kr.getTotalResults());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 45 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestSegmentIndex method testCase4.

/**
 * Matching a SpanElementQuery and a SpanNextQuery
 * Multiple atomic indices
 */
@Test
public void testCase4() throws IOException {
    // log.trace("Testcase4");
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    ki.addDoc(createFieldDoc1());
    ki.addDoc(createFieldDoc2());
    ki.commit();
    sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), new SpanTermQuery(new Term("base", "s:b"))));
    kr = ki.search(sq, (short) 10);
    ki.close();
    assertEquals("totalResults", kr.getTotalResults(), 2);
    // Match #0
    assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
    assertEquals("StartPos", 3, kr.getMatch(0).startPos);
    assertEquals("EndPos", 5, kr.getMatch(0).endPos);
    // Match #1
    assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
    assertEquals("StartPos", 1, kr.getMatch(1).startPos);
    assertEquals("EndPos", 3, kr.getMatch(1).endPos);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanSegmentQuery(de.ids_mannheim.korap.query.SpanSegmentQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)321 Test (org.junit.Test)310 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18