use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithTextStringQueries.
@Test
public void testIndexWithTextStringQueries() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.term("text", "mann"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("text", "Der alte Mann ging über die Straße"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.text("text", "Der alte Mann"));
assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"der alte mann\")");
assertEquals(1, kcn.docCount());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExample2Legacy.
@Test
public void filterExample2Legacy() throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
ki.commit();
/*
CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
KrillCollection kc = new KrillCollection(ki);
CollectionBuilder cb = kc.build();
kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 369);
// kc.filter(kf.and("corpusID", "QQQ"));
kc.filter(cb.term("corpusID", "QQQ"));
assertEquals("Documents", 0, kc.numberOf("documents"));
assertEquals("Tokens", 0, kc.numberOf("tokens"));
assertEquals("Sentences", 0, kc.numberOf("sentences"));
assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
ks.setCollection(kc);
// Create a query
kr = ks.apply(ki);
/*
kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 0);
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithMultipleCommitsAndDeletes.
@Test
public void testIndexWithMultipleCommitsAndDeletes() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(2, kcn.docCount());
// Add Sebastians doc
ki.addDoc(createDoc3());
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(3, kcn.docCount());
// Remove one document
ki.delDocs("author", "Peter");
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(2, kcn.docCount());
// Readd Peter's doc
ki.addDoc(createDoc2());
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(3, kcn.docCount());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class Resource method find.
// PUT: Return corpus info for virtual corpus
/**
* Find matches in the lucene index based on UIDs and return one
* match per doc.
*
* @param text_id
*/
@POST
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public String find(String json, @Context UriInfo uri) {
final Response kresp = _initResponse();
if (kresp.hasErrors())
return kresp.toJsonString();
// Search index
final Krill ks = new Krill(json);
// Get query parameters
final MultivaluedMap<String, String> qp = uri.getQueryParameters();
if (qp.get("uid") == null) {
kresp.addError(610, "Missing request parameters", "No unique IDs were given");
return kresp.toJsonString();
}
;
// Build Collection based on a list of uids
final List<String> uids = qp.get("uid");
// TODO: RESTRICT COLLECTION TO ONLY RESPECT SELF DOCS (REPLICATION)
// Ignore a Collection that may already be established
final KrillCollection kc = new KrillCollection();
kc.filterUIDs(uids.toArray(new String[uids.size()]));
ks.setCollection(kc);
// Only return the first match per text
ks.getMeta().setItemsPerResource(1);
return ks.apply(Node.getIndex()).toJsonString();
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrill method searchJSONmultitermRewriteBug.
@Test
public void searchJSONmultitermRewriteBug() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
assertEquals(ki.numberOf("documents"), 0);
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
ki.commit();
assertEquals(ki.numberOf("documents"), 1);
assertEquals("BZK", fd.getCorpusSigle());
// [tt/p="A.*"]{0,3}[tt/p="N.*"]
String json = getJsonString(getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile());
Krill ks = new Krill(json);
KrillCollection kc = ks.getCollection();
// No index was set
assertEquals(-1, kc.numberOf("documents"));
kc.setIndex(ki);
// Index was set but vc restricted to WPD
assertEquals(0, kc.numberOf("documents"));
/*
kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
*/
CollectionBuilder cb = new CollectionBuilder();
kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
ks.setCollection(kc);
assertEquals(1, kc.numberOf("documents"));
Result kr = ks.apply(ki);
assertEquals(kr.getSerialQuery(), "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " + "spanNext(spanRepetition(SpanMultiTermQueryWrapper" + "(tokens:/tt/p:A.*/){1,3}), " + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
assertEquals(kr.getTotalResults(), 58);
assertEquals(0, kr.getStartIndex());
assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
assertEquals(kr.getMatch(1).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
assertEquals(kr.getMatch(2).getSnippetBrackets(), "Saragat-Partei zerfällt [[Rom]] (ADN) " + "die von dem Rechtssozialisten Saragat ...");
assertEquals(kr.getMatch(3).getSnippetBrackets(), "Saragat-Partei zerfällt Rom ([[ADN]]) " + "die von dem Rechtssozialisten Saragat geführte ...");
assertEquals(kr.getMatch(23).getSnippetBrackets(), "... dem Namen \"Einheitsbewegung der sozialistischen " + "Initiative\" [[eine neue politische Gruppierung]] " + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
}
Aggregations