Search in sources :

Example 31 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithTextStringQueries.

@Test
public void testIndexWithTextStringQueries() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.term("text", "mann"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("text", "Der alte  Mann ging über die Straße"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.text("text", "Der alte Mann"));
    assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"der alte mann\")");
    assertEquals(1, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 32 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExample2Legacy.

@Test
public void filterExample2Legacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
    ki.commit();
    /*
        CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
        
        // Create Virtual collections:
        KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    KrillCollection kc = new KrillCollection(ki);
    CollectionBuilder cb = kc.build();
    kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 369);
    // kc.filter(kf.and("corpusID", "QQQ"));
    kc.filter(cb.term("corpusID", "QQQ"));
    assertEquals("Documents", 0, kc.numberOf("documents"));
    assertEquals("Tokens", 0, kc.numberOf("tokens"));
    assertEquals("Sentences", 0, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
    ks.setCollection(kc);
    // Create a query
    kr = ks.apply(ki);
    /*
        kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 0);
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 33 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithMultipleCommitsAndDeletes.

@Test
public void testIndexWithMultipleCommitsAndDeletes() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(2, kcn.docCount());
    // Add Sebastians doc
    ki.addDoc(createDoc3());
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(3, kcn.docCount());
    // Remove one document
    ki.delDocs("author", "Peter");
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(2, kcn.docCount());
    // Readd Peter's doc
    ki.addDoc(createDoc2());
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(3, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 34 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class Resource method find.

// PUT: Return corpus info for virtual corpus
/**
 * Find matches in the lucene index based on UIDs and return one
 * match per doc.
 *
 * @param text_id
 */
@POST
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public String find(String json, @Context UriInfo uri) {
    final Response kresp = _initResponse();
    if (kresp.hasErrors())
        return kresp.toJsonString();
    // Search index
    final Krill ks = new Krill(json);
    // Get query parameters
    final MultivaluedMap<String, String> qp = uri.getQueryParameters();
    if (qp.get("uid") == null) {
        kresp.addError(610, "Missing request parameters", "No unique IDs were given");
        return kresp.toJsonString();
    }
    ;
    // Build Collection based on a list of uids
    final List<String> uids = qp.get("uid");
    // TODO: RESTRICT COLLECTION TO ONLY RESPECT SELF DOCS (REPLICATION)
    // Ignore a Collection that may already be established
    final KrillCollection kc = new KrillCollection();
    kc.filterUIDs(uids.toArray(new String[uids.size()]));
    ks.setCollection(kc);
    // Only return the first match per text
    ks.getMeta().setItemsPerResource(1);
    return ks.apply(Node.getIndex()).toJsonString();
}
Also used : Response(de.ids_mannheim.korap.response.Response) Krill(de.ids_mannheim.korap.Krill) KrillCollection(de.ids_mannheim.korap.KrillCollection) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) Consumes(javax.ws.rs.Consumes)

Example 35 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrill method searchJSONmultitermRewriteBug.

@Test
public void searchJSONmultitermRewriteBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    assertEquals(ki.numberOf("documents"), 0);
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    assertEquals(ki.numberOf("documents"), 1);
    assertEquals("BZK", fd.getCorpusSigle());
    // [tt/p="A.*"]{0,3}[tt/p="N.*"]
    String json = getJsonString(getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile());
    Krill ks = new Krill(json);
    KrillCollection kc = ks.getCollection();
    // No index was set
    assertEquals(-1, kc.numberOf("documents"));
    kc.setIndex(ki);
    // Index was set but vc restricted to WPD
    assertEquals(0, kc.numberOf("documents"));
    /*
        kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
        */
    CollectionBuilder cb = new CollectionBuilder();
    kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
    ks.setCollection(kc);
    assertEquals(1, kc.numberOf("documents"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " + "spanNext(spanRepetition(SpanMultiTermQueryWrapper" + "(tokens:/tt/p:A.*/){1,3}), " + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
    assertEquals(kr.getTotalResults(), 58);
    assertEquals(0, kr.getStartIndex());
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(1).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(2).getSnippetBrackets(), "Saragat-Partei zerfällt [[Rom]] (ADN) " + "die von dem Rechtssozialisten Saragat ...");
    assertEquals(kr.getMatch(3).getSnippetBrackets(), "Saragat-Partei zerfällt Rom ([[ADN]]) " + "die von dem Rechtssozialisten Saragat geführte ...");
    assertEquals(kr.getMatch(23).getSnippetBrackets(), "... dem Namen \"Einheitsbewegung der sozialistischen " + "Initiative\" [[eine neue politische Gruppierung]] " + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

KrillCollection (de.ids_mannheim.korap.KrillCollection)37 Test (org.junit.Test)35 KrillIndex (de.ids_mannheim.korap.KrillIndex)24 Krill (de.ids_mannheim.korap.Krill)17 Result (de.ids_mannheim.korap.response.Result)15 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)11 SearchContext (de.ids_mannheim.korap.response.SearchContext)5 MMapDirectory (org.apache.lucene.store.MMapDirectory)5 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)4 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)4 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 Term (org.apache.lucene.index.Term)2 SpanQuery (org.apache.lucene.search.spans.SpanQuery)2 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)1 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)1