Search in sources :

Example 16 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestSpanExpansionIndex method indexRegexSequence.

@Test
public void indexRegexSequence() throws Exception {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createFieldDoc5());
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQueryWrapper sq = kq.seq(kq.or("s:baumgarten", "s:steingarten")).append(kq.seg().without(kq.or("s:franz", "s:hans")));
    // Expected to find [baumgarten steingarten]
    Krill ks = _newKrill(sq);
    Result kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("... baum [[baumgarten steingarten]] franz ...", kr.getMatch(0).getSnippetBrackets());
    // The same result should be shown for:
    sq = kq.seq(kq.re("s:.*garten")).append(kq.seg().without(kq.re("s:.*an.*")));
    ks = _newKrill(sq);
    kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("... baum [[baumgarten steingarten]] franz ...", kr.getMatch(0).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 17 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestSpanExpansionIndex method testExclusionWithMultipleDocs.

/**
 * Expansion exclusion : multiple documents
 *
 * @throws IOException
 */
@Test
public void testExclusionWithMultipleDocs() throws IOException {
    KrillIndex ki = new KrillIndex();
    // same doc
    ki.addDoc(createFieldDoc0());
    // only not clause
    ki.addDoc(createFieldDoc1());
    // only main clause
    ki.addDoc(createFieldDoc2());
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
    SpanTermQuery notQuery = new SpanTermQuery(new Term("base", "s:d"));
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true);
    kr = ki.search(seq, (short) 20);
    // notClause.doc() > firstSpans.doc()
    assertEquals(7, kr.getMatch(0).getStartPos());
    assertEquals(10, kr.getMatch(0).getEndPos());
    assertEquals(7, kr.getMatch(1).getStartPos());
    assertEquals(11, kr.getMatch(1).getEndPos());
    // !hasMoreNotClause
    assertEquals(2, kr.getMatch(4).getLocalDocID());
    assertEquals(1, kr.getMatch(4).getStartPos());
    assertEquals(4, kr.getMatch(4).getEndPos());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 18 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestSpanExpansionIndex method testExpansionOverStart.

/**
 * Expansion over start and end documents start => cut to 0
 * TODO: end => to be handled in rendering process
 *
 * @throws IOException
 */
@Test
public void testExpansionOverStart() throws IOException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
    // left expansion precedes 0
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, 2, 2, -1, true);
    kr = ki.search(seq, (short) 10);
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals(2, kr.getMatch(0).getStartPos());
    assertEquals(5, kr.getMatch(0).getEndPos());
    // right expansion exceeds end position
    seq = new SpanExpansionQuery(stq, 3, 3, 0, true);
    kr = ki.search(seq, (short) 10);
    assertEquals((long) 4, kr.getTotalResults());
    assertEquals(7, kr.getMatch(2).getStartPos());
    assertEquals(11, kr.getMatch(2).getEndPos());
    assertEquals(8, kr.getMatch(3).getStartPos());
    assertEquals(12, kr.getMatch(3).getEndPos());
/*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * //+km.getSnippetBrackets() ); }
         */
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 19 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestSpanExpansionIndex method testLeftExpansionRedundantMatches.

/**
 * Tests left expansion over start doc boundary. Redundant matches should
 *  be omitted.
 * @throws IOException
 */
@Test
public void testLeftExpansionRedundantMatches() throws IOException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(simpleFieldDoc("A d F ΓΌ d T F u d m", " "));
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:d"));
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, 0, 6, -1, true);
    Result kr = ki.search(seq, (short) 20);
    // for (Match km : kr.getMatches()) {
    // System.out.println(km.getStartPos() + "," + km.getEndPos() + " " +
    // km.getSnippetBrackets());
    // };
    Match m = kr.getMatch(5);
    assertEquals(2, m.getStartPos());
    assertEquals(9, m.getEndPos());
    assertEquals(14, kr.getTotalResults());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 20 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestUnorderedDistanceIndex method testCase7.

/**
 * Nested distance queries
 */
@Test
public void testCase7() throws IOException {
    // System.out.println("testcase 7");
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.addDoc(createFieldDoc1());
    ki.commit();
    SpanQuery sq = createQuery("s:c", "s:d", 1, 2, false);
    SpanQuery sq2 = new SpanDistanceQuery(sq, new SpanTermQuery(new Term("base", "s:e")), new DistanceConstraint(1, 2, true, false), true);
    kr = ki.search(sq2, (short) 10);
    assertEquals(kr.getTotalResults(), 3);
    assertEquals(5, kr.getMatch(0).getStartPos());
    assertEquals(9, kr.getMatch(0).getEndPos());
    assertEquals(1, kr.getMatch(1).getLocalDocID());
    assertEquals(0, kr.getMatch(1).getStartPos());
    assertEquals(3, kr.getMatch(1).getEndPos());
    assertEquals(0, kr.getMatch(2).getStartPos());
    assertEquals(4, kr.getMatch(2).getEndPos());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanDistanceQuery(de.ids_mannheim.korap.query.SpanDistanceQuery) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)320 Test (org.junit.Test)309 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18