use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method fuzzingRepetitionBug.
/**
* This method creates a corpus using fuzzing to
* check for unexpected, failing constellations
* regarding repetition queries.
* By shrinking the accepted result length, it tries
* to minimize the complexity of the constellations.
*/
public void fuzzingRepetitionBug() throws IOException, QueryException {
List<String> chars = Arrays.asList("a", "b", "c", "c", "d", "e");
// Construct index
Pattern p = Pattern.compile("bccc?d");
QueryBuilder qb = new QueryBuilder("base");
// b c{2,3} d
SpanQuery sq = qb.seq(qb.seg("s:b")).append(qb.repeat(qb.seg("s:c"), 2, 3)).append(qb.seg("s:d")).toQuery();
Krill ks = new Krill(sq);
assertEquals(ks.getSpanQuery().toString(), "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
String lastFailureConf = "";
int minLength = 6;
int maxLength = 22;
int maxDocs = 8;
// Create fuzzy corpora (1000 trials)
for (int x = 0; x < 100000; x++) {
KrillIndex ki = new KrillIndex();
ArrayList<String> list = new ArrayList<String>();
int c = 0;
// Create a corpus of 8 fuzzy docs
for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
FieldDocument testDoc = simpleFuzzyFieldDoc(chars, minLength, maxLength);
String testString = testDoc.doc.getField("base").stringValue();
Matcher m = p.matcher(testString);
list.add(testString);
while (m.find()) c++;
ki.addDoc(testDoc);
}
;
ki.commit();
Result kr = ks.apply(ki);
// spit out the corpus configurations
if (c != kr.getTotalResults()) {
String failureConf = c + ":" + kr.getTotalResults() + " " + list.toString();
if (lastFailureConf.length() == 0 || failureConf.length() < lastFailureConf.length()) {
System.err.println(failureConf);
lastFailureConf = failureConf;
minLength--;
maxDocs--;
}
;
}
;
}
;
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testRepetitionInSequences.
@Test
public void testRepetitionInSequences() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.commit();
SpanQuery sq, sq2;
// ec{1,2}
sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 2, true));
kr = ki.search(sq, (short) 10);
// 1-3, 1-4, 4-6
assertEquals((long) 3, kr.getTotalResults());
assertEquals(1, kr.getMatch(0).getStartPos());
assertEquals(3, kr.getMatch(0).getEndPos());
assertEquals(1, kr.getMatch(1).getStartPos());
assertEquals(4, kr.getMatch(1).getEndPos());
assertEquals(4, kr.getMatch(2).getStartPos());
assertEquals(6, kr.getMatch(2).getEndPos());
// ec{1,2}d
sq2 = new SpanNextQuery(sq, new SpanTermQuery(new Term("base", "s:d")));
kr = ki.search(sq2, (short) 10);
assertEquals((long) 2, kr.getTotalResults());
assertEquals(1, kr.getMatch(0).startPos);
assertEquals(5, kr.getMatch(0).endPos);
assertEquals(4, kr.getMatch(1).startPos);
assertEquals(7, kr.getMatch(1).endPos);
// Multiple documents
ki.addDoc(createFieldDoc1());
ki.commit();
kr = ki.search(sq2, (short) 10);
assertEquals((long) 5, kr.getTotalResults());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testCase2.
/**
* Skip to
*/
@Test
public void testCase2() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.addDoc(createFieldDoc3());
ki.addDoc(createFieldDoc2());
ki.addDoc(createFieldDoc1());
ki.commit();
SpanQuery sq;
// c{2,2}
// sq = new SpanRepetitionQuery(
// new SpanTermQuery(new Term("base", "s:c")), 2, 2, true);
// kr = ki.search(sq, (short) 10);
// // doc1 2-4, 3-5, 4-6
// assertEquals((long) 6, kr.getTotalResults());
// ec{2,2}
sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 2, 2, true));
kr = ki.search(sq, (short) 10);
assertEquals((long) 2, kr.getTotalResults());
assertEquals(3, kr.getMatch(1).getLocalDocID());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testRepetitionSnippetBug1.
@Test
public void testRepetitionSnippetBug1() throws IOException, QueryException {
// Construct index
Pattern p = Pattern.compile("bccc?d");
// Der [corenlp/p=ADJA]{2,3} Baum
QueryBuilder qb = new QueryBuilder("base");
// b c{2,3} d
SpanQuery sq = qb.seq(qb.seg("s:b")).append(qb.repeat(qb.seg("s:c"), 2, 3)).append(qb.seg("s:d")).toQuery();
Krill ks = new Krill(sq);
assertEquals(ks.getSpanQuery().toString(), "spanNext(spanNext(base:s:b, spanRepetition(base:s:c{2,3})), base:s:d)");
// simpleDocTest
KrillIndex ki = new KrillIndex();
ki.addDoc(simpleFieldDoc("abccde"));
ki.commit();
Result kr = ks.apply(ki);
assertEquals(1, kr.getTotalResults());
// fuzzingRepetitionBug();
// First fuzzed failure (0 vs 1)
ki = new KrillIndex();
// 0
ki.addDoc(simpleFieldDoc("cccd"));
// 1
ki.addDoc(simpleFieldDoc("bccccccaeae"));
// 2
ki.addDoc(simpleFieldDoc("cbcedb"));
ki.commit();
kr = ks.apply(ki);
assertEquals(0, kr.getTotalResults());
// Third fuzzed failure (1 vs 2)
ki = new KrillIndex();
ki.addDoc(simpleFieldDoc("bccdcb"));
ki.addDoc(simpleFieldDoc("ebccce"));
ki.addDoc(simpleFieldDoc("adbdcd"));
ki.commit();
kr = ks.apply(ki);
assertEquals(1, kr.getTotalResults());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestSegmentIndex method testCase4.
/**
* Matching a SpanElementQuery and a SpanNextQuery
* Multiple atomic indices
*/
@Test
public void testCase4() throws IOException {
// log.trace("Testcase4");
ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.commit();
ki.addDoc(createFieldDoc1());
ki.addDoc(createFieldDoc2());
ki.commit();
sq = new SpanSegmentQuery(new SpanElementQuery("base", "e"), new SpanNextQuery(new SpanTermQuery(new Term("base", "s:a")), new SpanTermQuery(new Term("base", "s:b"))));
kr = ki.search(sq, (short) 10);
ki.close();
assertEquals("totalResults", kr.getTotalResults(), 2);
// Match #0
assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
assertEquals("StartPos", 3, kr.getMatch(0).startPos);
assertEquals("EndPos", 5, kr.getMatch(0).endPos);
// Match #1
assertEquals("doc-number", 0, kr.getMatch(1).getLocalDocID());
assertEquals("StartPos", 1, kr.getMatch(1).startPos);
assertEquals("EndPos", 3, kr.getMatch(1).endPos);
}
Aggregations