Search in sources :

Example 6 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase4.

/**
 * Left expansion with exclusion
 * No expansion
 */
@Test
public void testCase4() throws IOException {
    byte classNumber = 1;
    SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
    SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens", "tt/p:ADJA"));
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 2, -1, classNumber, true);
    kr = ki.search(seq, (short) 10);
    assertEquals(6, kr.getMatch(0).getStartPos());
    assertEquals(7, kr.getMatch(0).getEndPos());
    assertEquals(6, kr.getMatch(0).getStartPos(1));
    assertEquals(6, kr.getMatch(0).getEndPos(1));
    assertEquals(12, kr.getMatch(4).getStartPos());
    assertEquals(13, kr.getMatch(4).getEndPos());
    assertEquals(12, kr.getMatch(5).getStartPos());
    assertEquals(15, kr.getMatch(5).getEndPos());
    assertEquals(12, kr.getMatch(5).getStartPos(1));
    assertEquals(14, kr.getMatch(5).getEndPos(1));
    assertEquals(13, kr.getMatch(6).getStartPos());
    assertEquals(15, kr.getMatch(6).getEndPos());
    assertEquals(13, kr.getMatch(6).getStartPos(1));
    assertEquals(14, kr.getMatch(6).getEndPos(1));
/*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * +km.getSnippetBrackets()); }
         */
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Example 7 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testQueryRewriteBug.

/**
 * Query rewrite bug
 *
 * Warning: This is not armoured by <base/s=t>!
 *
 * @throws IOException
 */
@Test
public void testQueryRewriteBug() throws IOException {
    KrillIndex ki = new KrillIndex();
    // ceccecdeec
    ki.addDoc(createFieldDoc0());
    /*
        ki.addDoc(createFieldDoc1()); // bbccdd || only not clause
        ki.addDoc(createFieldDoc2()); // beccea | only main clause
        */
    ki.commit();
    // See /queries/bugs/repetition_group_rewrite
    RegexpQuery requery = new RegexpQuery(new Term("base", "s:[ac]"), RegExp.ALL);
    SpanMultiTermQueryWrapper<RegexpQuery> query = new SpanMultiTermQueryWrapper<RegexpQuery>(requery);
    SpanExpansionQuery seq = new SpanExpansionQuery(query, 1, 1, 1, true);
    SpanRepetitionQuery rep = new SpanRepetitionQuery(seq, 2, 2, true);
    // spanRepetition(
    // spanExpansion(
    // SpanMultiTermQueryWrapper(base:/s:[ac]/),
    // []{1, 1},
    // right
    // ){2,2}
    // )
    kr = ki.search(query, (short) 20);
    assertEquals(5, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(1, kr.getMatch(0).getEndPos());
    assertEquals(2, kr.getMatch(1).getStartPos());
    assertEquals(3, kr.getMatch(1).getEndPos());
    assertEquals(3, kr.getMatch(2).getStartPos());
    assertEquals(4, kr.getMatch(2).getEndPos());
    assertEquals(5, kr.getMatch(3).getStartPos());
    assertEquals(6, kr.getMatch(3).getEndPos());
    assertEquals(9, kr.getMatch(4).getStartPos());
    assertEquals(10, kr.getMatch(4).getEndPos());
    kr = ki.search(seq, (short) 20);
    assertEquals(5, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(2, kr.getMatch(0).getEndPos());
    assertEquals(2, kr.getMatch(1).getStartPos());
    assertEquals(4, kr.getMatch(1).getEndPos());
    assertEquals(3, kr.getMatch(2).getStartPos());
    assertEquals(5, kr.getMatch(2).getEndPos());
    assertEquals(5, kr.getMatch(3).getStartPos());
    assertEquals(7, kr.getMatch(3).getEndPos());
    assertEquals(9, kr.getMatch(4).getStartPos());
    assertEquals(11, kr.getMatch(4).getEndPos());
    kr = ki.search(rep, (short) 20);
    assertEquals("[[cecc]]ecdeec", kr.getMatch(0).getSnippetBrackets());
    assertEquals("cec[[cecd]]eec", kr.getMatch(1).getSnippetBrackets());
    assertEquals((long) 2, kr.getTotalResults());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) RegexpQuery(org.apache.lucene.search.RegexpQuery) Test(org.junit.Test)

Example 8 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testBugRegexExpandLeftNoMoreSpan.

@Test
public void testBugRegexExpandLeftNoMoreSpan() throws IOException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createFieldDoc6());
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:a"));
    RegexpQuery requery = new RegexpQuery(new Term("base", "s:[bc]"), RegExp.ALL);
    SpanMultiTermQueryWrapper<RegexpQuery> notQuery = new SpanMultiTermQueryWrapper<RegexpQuery>(requery);
    byte classNumber = 1;
    // left expansion
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 0, 1, -1, classNumber, true);
    kr = ki.search(seq, (short) 20);
    assertEquals(9, kr.getMatches().size());
}
Also used : SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) RegexpQuery(org.apache.lucene.search.RegexpQuery) Test(org.junit.Test)

Example 9 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase1.

/**
 * Left and right expansions
 */
@Test
public void testCase1() throws IOException {
    SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des"));
    // left
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 2, -1, true);
    kr = ki.search(seq, (short) 10);
    // assertEquals(69,kr.getTotalResults());
    assertEquals(5, kr.getMatch(0).getStartPos());
    assertEquals(8, kr.getMatch(0).getEndPos());
    assertEquals(6, kr.getMatch(1).getStartPos());
    assertEquals(8, kr.getMatch(1).getEndPos());
    assertEquals(7, kr.getMatch(2).getStartPos());
    assertEquals(8, kr.getMatch(2).getEndPos());
    /*
         * for (Match km : kr.getMatches()) {
         * System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
         * km.getSnippetBrackets()); }
         */
    // right
    seq = new SpanExpansionQuery(stq, 3, 4, 0, true);
    kr = ki.search(seq, (short) 10);
    assertEquals(7, kr.getMatch(0).getStartPos());
    assertEquals(11, kr.getMatch(0).getEndPos());
    assertEquals(7, kr.getMatch(1).getStartPos());
    assertEquals(12, kr.getMatch(1).getEndPos());
    assertEquals(156, kr.getMatch(2).getStartPos());
    assertEquals(160, kr.getMatch(2).getEndPos());
    assertEquals(156, kr.getMatch(3).getStartPos());
    assertEquals(161, kr.getMatch(3).getEndPos());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Aggregations

SpanExpansionQuery (de.ids_mannheim.korap.query.SpanExpansionQuery)9 Term (org.apache.lucene.index.Term)8 Test (org.junit.Test)8 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)7 KrillIndex (de.ids_mannheim.korap.KrillIndex)4 RegexpQuery (org.apache.lucene.search.RegexpQuery)2 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)2 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanRepetitionQuery (de.ids_mannheim.korap.query.SpanRepetitionQuery)1 SpanQuery (org.apache.lucene.search.spans.SpanQuery)1