Search in sources :

Example 6 with SpanRepetitionQuery

use of de.ids_mannheim.korap.query.SpanRepetitionQuery in project Krill by KorAP.

the class TestRepetitionIndex method testRepetitionInSequences.

@Test
public void testRepetitionInSequences() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanQuery sq, sq2;
    // ec{1,2}
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 2, true));
    kr = ki.search(sq, (short) 10);
    // 1-3, 1-4, 4-6
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).getStartPos());
    assertEquals(3, kr.getMatch(0).getEndPos());
    assertEquals(1, kr.getMatch(1).getStartPos());
    assertEquals(4, kr.getMatch(1).getEndPos());
    assertEquals(4, kr.getMatch(2).getStartPos());
    assertEquals(6, kr.getMatch(2).getEndPos());
    // ec{1,2}d
    sq2 = new SpanNextQuery(sq, new SpanTermQuery(new Term("base", "s:d")));
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).startPos);
    assertEquals(5, kr.getMatch(0).endPos);
    assertEquals(4, kr.getMatch(1).startPos);
    assertEquals(7, kr.getMatch(1).endPos);
    // Multiple documents
    ki.addDoc(createFieldDoc1());
    ki.commit();
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 5, kr.getTotalResults());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 7 with SpanRepetitionQuery

use of de.ids_mannheim.korap.query.SpanRepetitionQuery in project Krill by KorAP.

the class TestRepetitionIndex method testCase4.

@Test
public void testCase4() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.commit();
    SpanQuery sq;
    // c{2,2}
    sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 3, true);
    kr = ki.search(sq, (short) 10);
    // 2-3, 2-4, 2-5, 3-4, 3-5, 3-6, 4-5, 4-6, 5-6, 7-8
    assertEquals((long) 10, kr.getTotalResults());
    sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 2, 3, true);
    kr = ki.search(sq, (short) 10);
    // 2-4, 2-5, 3-5, 3-6, 4-6
    assertEquals((long) 5, kr.getTotalResults());
// System.out.print(kr.getTotalResults()+"\n");
// for (int i=0; i< kr.getTotalResults(); i++){
// System.out.println(
// kr.match(i).getLocalDocID()+" "+
// kr.match(i).startPos + " " +
// kr.match(i).endPos
// );
// }
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 8 with SpanRepetitionQuery

use of de.ids_mannheim.korap.query.SpanRepetitionQuery in project Krill by KorAP.

the class TestRepetitionIndex method testMinZeroRepetition.

@Test
public void testMinZeroRepetition() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanQuery sq, sq2;
    sq = new SpanTermQuery(new Term("base", "s:e"));
    kr = ki.search(sq, (short) 10);
    assertEquals((long) 4, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).getStartPos());
    assertEquals(2, kr.getMatch(0).getEndPos());
    assertEquals(4, kr.getMatch(1).getStartPos());
    assertEquals(5, kr.getMatch(1).getEndPos());
    assertEquals(7, kr.getMatch(2).getStartPos());
    assertEquals(8, kr.getMatch(2).getEndPos());
    assertEquals(8, kr.getMatch(3).getStartPos());
    assertEquals(9, kr.getMatch(3).getEndPos());
    try {
        sq2 = new SpanNextQuery(sq, new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 0, 1, true));
    } catch (IllegalArgumentException e) {
        assertEquals("Minimum repetition must not lower than 1.", e.getMessage());
    }
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 9 with SpanRepetitionQuery

use of de.ids_mannheim.korap.query.SpanRepetitionQuery in project Krill by KorAP.

the class TestRepetitionIndex method testTermQuery.

@Test
public void testTermQuery() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    // Quantifier only
    // c{1,2}
    SpanQuery sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 2, true);
    kr = ki.search(sq, (short) 10);
    // 0-1, 2-3, 2-4, 3-4, 5-6
    assertEquals((long) 5, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(1, kr.getMatch(0).getEndPos());
    assertEquals(2, kr.getMatch(1).getStartPos());
    assertEquals(3, kr.getMatch(1).getEndPos());
    assertEquals(2, kr.getMatch(2).getStartPos());
    assertEquals(4, kr.getMatch(2).getEndPos());
    assertEquals(3, kr.getMatch(3).getStartPos());
    assertEquals(4, kr.getMatch(3).getEndPos());
    assertEquals(5, kr.getMatch(4).getStartPos());
    assertEquals(6, kr.getMatch(4).getEndPos());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 10 with SpanRepetitionQuery

use of de.ids_mannheim.korap.query.SpanRepetitionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testQueryRewriteBug.

/**
 * Query rewrite bug
 *
 * Warning: This is not armoured by <base/s=t>!
 *
 * @throws IOException
 */
@Test
public void testQueryRewriteBug() throws IOException {
    KrillIndex ki = new KrillIndex();
    // ceccecdeec
    ki.addDoc(createFieldDoc0());
    /*
        ki.addDoc(createFieldDoc1()); // bbccdd || only not clause
        ki.addDoc(createFieldDoc2()); // beccea | only main clause
        */
    ki.commit();
    // See /queries/bugs/repetition_group_rewrite
    RegexpQuery requery = new RegexpQuery(new Term("base", "s:[ac]"), RegExp.ALL);
    SpanMultiTermQueryWrapper<RegexpQuery> query = new SpanMultiTermQueryWrapper<RegexpQuery>(requery);
    SpanExpansionQuery seq = new SpanExpansionQuery(query, 1, 1, 1, true);
    SpanRepetitionQuery rep = new SpanRepetitionQuery(seq, 2, 2, true);
    // spanRepetition(
    // spanExpansion(
    // SpanMultiTermQueryWrapper(base:/s:[ac]/),
    // []{1, 1},
    // right
    // ){2,2}
    // )
    kr = ki.search(query, (short) 20);
    assertEquals(5, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(1, kr.getMatch(0).getEndPos());
    assertEquals(2, kr.getMatch(1).getStartPos());
    assertEquals(3, kr.getMatch(1).getEndPos());
    assertEquals(3, kr.getMatch(2).getStartPos());
    assertEquals(4, kr.getMatch(2).getEndPos());
    assertEquals(5, kr.getMatch(3).getStartPos());
    assertEquals(6, kr.getMatch(3).getEndPos());
    assertEquals(9, kr.getMatch(4).getStartPos());
    assertEquals(10, kr.getMatch(4).getEndPos());
    kr = ki.search(seq, (short) 20);
    assertEquals(5, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(2, kr.getMatch(0).getEndPos());
    assertEquals(2, kr.getMatch(1).getStartPos());
    assertEquals(4, kr.getMatch(1).getEndPos());
    assertEquals(3, kr.getMatch(2).getStartPos());
    assertEquals(5, kr.getMatch(2).getEndPos());
    assertEquals(5, kr.getMatch(3).getStartPos());
    assertEquals(7, kr.getMatch(3).getEndPos());
    assertEquals(9, kr.getMatch(4).getStartPos());
    assertEquals(11, kr.getMatch(4).getEndPos());
    kr = ki.search(rep, (short) 20);
    assertEquals("[[cecc]]ecdeec", kr.getMatch(0).getSnippetBrackets());
    assertEquals("cec[[cecd]]eec", kr.getMatch(1).getSnippetBrackets());
    assertEquals((long) 2, kr.getTotalResults());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) RegexpQuery(org.apache.lucene.search.RegexpQuery) Test(org.junit.Test)

Aggregations

SpanRepetitionQuery (de.ids_mannheim.korap.query.SpanRepetitionQuery)10 Term (org.apache.lucene.index.Term)10 Test (org.junit.Test)10 SpanQuery (org.apache.lucene.search.spans.SpanQuery)9 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)9 KrillIndex (de.ids_mannheim.korap.KrillIndex)8 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)6 Krill (de.ids_mannheim.korap.Krill)2 SpanExpansionQuery (de.ids_mannheim.korap.query.SpanExpansionQuery)1 RegexpQuery (org.apache.lucene.search.RegexpQuery)1 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)1 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)1