Search in sources :

Example 76 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexArbitraryMetaDataSorted.

@Test
public void indexArbitraryMetaDataSorted() throws Exception {
    String json = createDocString1();
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    ArrayList hs = new ArrayList<String>();
    hs.add("titel");
    hs.add("datum");
    JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode();
    assertEquals("type:text", res.at("/document/fields/0/type").asText());
    assertEquals("titel", res.at("/document/fields/0/key").asText());
    assertEquals("Der alte Baum", res.at("/document/fields/0/value").asText());
    assertEquals("type:date", res.at("/document/fields/1/type").asText());
    assertEquals("datum", res.at("/document/fields/1/key").asText());
    assertEquals("2018-04-03", res.at("/document/fields/1/value").asText());
    assertTrue(res.at("/document/fields/2").isMissingNode());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 77 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexExample2.

@Test
public void indexExample2() throws Exception {
    String json = new String("{" + "  \"fields\" : [" + "    { " + "      \"primaryData\" : \"abc\"" + "    }," + "    {" + "      \"name\" : \"tokens\"," + "      \"data\" : [" + "         [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," + "         [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," + "         [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + "      ]" + "    }" + "  ]," + "  \"corpusID\"  : \"WPD\"," + "  \"ID\"        : \"WPD-AAA-00001\"," + "  \"textClass\" : \"music entertainment\"," + "  \"author\"    : \"Peter Frankenfeld\"," + "  \"pubDate\"   : 20130617," + "  \"title\"     : \"Wikipedia\"," + "  \"subTitle\"  : \"Die freie Enzyklopädie\"," + "  \"pubPlace\"  : \"Bochum\"" + "}");
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    assertEquals(fd.getPrimaryData(), "abc");
    assertEquals(fd.getCorpusID(), "WPD");
    assertEquals(fd.getID(), "WPD-AAA-00001");
    assertEquals(fd.getFieldValue("textClass"), "music entertainment");
    assertEquals(fd.getFieldValue("author"), "Peter Frankenfeld");
    assertEquals(fd.getFieldValue("title"), "Wikipedia");
    assertEquals(fd.getFieldValue("subTitle"), "Die freie Enzyklopädie");
    assertEquals(fd.getFieldValue("pubPlace"), "Bochum");
    assertEquals(fd.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17");
    QueryBuilder kq = new QueryBuilder("tokens");
    Result kr = ki.search((SpanQuery) kq.seq(kq.nr(3, kq.seg("s:b"))).toQuery());
    Match km = kr.getMatch(0);
    assertEquals(km.getPrimaryData(), "abc");
    assertEquals(km.getCorpusID(), "WPD");
    assertEquals(km.getDocID(), "WPD-AAA-00001");
    assertEquals(km.getFieldValue("textClass"), "music entertainment");
    assertEquals(km.getFieldValue("author"), "Peter Frankenfeld");
    assertEquals(km.getFieldValue("title"), "Wikipedia");
    assertEquals(km.getFieldValue("subTitle"), "Die freie Enzyklopädie");
    assertEquals(km.getFieldValue("pubPlace"), "Bochum");
    assertEquals(km.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17");
    assertEquals(km.getSnippetBrackets(), "a[[{3:b}]]c");
}
Also used : QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 78 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexUpsert.

@Test
public void indexUpsert() throws Exception {
    KrillIndex ki = new KrillIndex();
    // Add new document
    FieldDocument fd = new FieldDocument();
    fd.addString("textSigle", "AAA/BBB/001");
    fd.addString("content", "Example1");
    ki.upsertDoc(fd);
    ki.commit();
    MetaFields mfs = ki.getFields("AAA/BBB/001");
    assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
    assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
    assertEquals(mfs.getFieldValue("indexCreationDate"), mfs.getFieldValue("indexLastModified"));
    assertEquals(mfs.getFieldValue("content"), "Example1");
    // Add new document
    fd = new FieldDocument();
    fd.addString("textSigle", "AAA/BBB/002");
    fd.addString("content", "Example2");
    ki.upsertDoc(fd);
    ki.commit();
    mfs = ki.getFields("AAA/BBB/002");
    assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
    assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
    assertEquals(mfs.getFieldValue("content"), "Example2");
    fd = new FieldDocument();
    fd.addString("textSigle", "AAA/BBB/001");
    fd.addString("content", "Example3");
    ki.upsertDoc(fd);
    ki.commit();
    mfs = ki.getFields("AAA/BBB/001");
    assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
    assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
    assertEquals(mfs.getFieldValue("content"), "Example3");
    assertEquals(ki.numberOf("documents"), 2);
    // Test Inputstream method
    ki.upsertDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
    ki.commit();
    assertEquals(ki.numberOf("documents"), 3);
    ki.close();
    fd = new FieldDocument();
    fd.addString("textSigle", "AAA/DDD/005");
    fd.addString("content", "Example4");
    ki.upsertDoc(fd);
    ki.commit();
    assertEquals(ki.numberOf("documents"), 4);
}
Also used : MetaFields(de.ids_mannheim.korap.response.MetaFields) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 79 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFocusIndex method fuzzingTestCompareTotal.

// Annotated fuzzing test
public static void fuzzingTestCompareTotal(List<String> chars, SpanQuery sq1, SpanQuery sq2, int minTextLength, int maxTextLength, int maxDocs) throws IOException, QueryException {
    Krill ks1 = new Krill(sq1);
    Krill ks2 = new Krill(sq2);
    String lastFailureConf = "";
    // Multiple runs of corpus creation and query checks
    for (int x = 0; x < 100000; x++) {
        KrillIndex ki = new KrillIndex();
        ArrayList<String> list = new ArrayList<String>();
        int c = 0;
        // Create a corpus of <= maxDocs fuzzy docs
        for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
            FieldDocument testDoc = TestSimple.annotatedFuzzyFieldDoc(chars, minTextLength, maxTextLength);
            ki.addDoc(testDoc);
            String testString = testDoc.doc.getField("copy").stringValue();
            list.add(testString);
        }
        ;
        ki.commit();
        // Search and compare both queries
        Result kr1 = ks1.apply(ki);
        Result kr2 = ks2.apply(ki);
        if (kr1.getTotalResults() != kr2.getTotalResults()) {
            String failureConf = "Failure:" + list.toString();
            // Try to keep the failing configuration small
            if (lastFailureConf.length() == 0 || failureConf.length() < lastFailureConf.length()) {
                System.err.println(failureConf);
                lastFailureConf = failureConf;
                minTextLength--;
                maxDocs--;
            }
            ;
        }
        ;
    }
    ;
}
Also used : Krill(de.ids_mannheim.korap.Krill) ArrayList(java.util.ArrayList) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result)

Example 80 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFocusIndex method testFocusSorting.

@Test
public void testFocusSorting() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc());
    ki.commit();
    SpanElementQuery elemX = new SpanElementQuery("tokens", "x");
    assertEquals("<tokens:x />", elemX.toString());
    kr = ki.search(elemX, (short) 10);
    assertEquals("[[abc]]d", kr.getMatch(0).getSnippetBrackets());
    assertEquals("a[[bcd]]", kr.getMatch(1).getSnippetBrackets());
    assertEquals(2, kr.getTotalResults());
    SpanQuery termB = new SpanTermQuery(new Term("tokens", "s:b"));
    SpanQuery termC = new SpanTermQuery(new Term("tokens", "s:c"));
    SpanQuery classB = new SpanClassQuery(termB, (byte) 1);
    SpanQuery classC = new SpanClassQuery(termC, (byte) 1);
    SpanQuery within = new SpanWithinQuery(elemX, classB);
    kr = ki.search(within, (short) 10);
    assertEquals("[[a{1:b}c]]d", kr.getMatch(0).getSnippetBrackets());
    assertEquals("a[[{1:b}cd]]", kr.getMatch(1).getSnippetBrackets());
    assertEquals(2, kr.getTotalResults());
    SpanQuery or = new SpanOrQuery(classB, classC);
    within = new SpanWithinQuery(elemX, or);
    kr = ki.search(within, (short) 10);
    assertEquals("[[a{1:b}c]]d", kr.getMatch(0).getSnippetBrackets());
    assertEquals("[[ab{1:c}]]d", kr.getMatch(1).getSnippetBrackets());
    assertEquals("a[[{1:b}cd]]", kr.getMatch(2).getSnippetBrackets());
    assertEquals("a[[b{1:c}d]]", kr.getMatch(3).getSnippetBrackets());
    assertEquals(4, kr.getTotalResults());
    SpanFocusQuery focus = new SpanFocusQuery(within, (byte) 1);
    focus.setSorted(false);
    kr = ki.search(focus, (short) 10);
    assertEquals("focus(1: spanContain(<tokens:x />, spanOr([{1: tokens:s:b}, {1: tokens:s:c}])),sorting)", focus.toString());
    assertEquals("a[[{1:b}]]cd", kr.getMatch(0).getSnippetBrackets());
    assertEquals("a[[{1:b}]]cd", kr.getMatch(1).getSnippetBrackets());
    assertEquals("ab[[{1:c}]]d", kr.getMatch(2).getSnippetBrackets());
    assertEquals("ab[[{1:c}]]d", kr.getMatch(3).getSnippetBrackets());
    assertEquals(4, kr.getTotalResults());
    testFocusSortingOverWindowSize(elemX, classB, classC);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)321 Test (org.junit.Test)310 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18