use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexArbitraryMetaDataSorted.
@Test
public void indexArbitraryMetaDataSorted() throws Exception {
String json = createDocString1();
KrillIndex ki = new KrillIndex();
FieldDocument fd = ki.addDoc(json);
ki.commit();
ArrayList hs = new ArrayList<String>();
hs.add("titel");
hs.add("datum");
JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode();
assertEquals("type:text", res.at("/document/fields/0/type").asText());
assertEquals("titel", res.at("/document/fields/0/key").asText());
assertEquals("Der alte Baum", res.at("/document/fields/0/value").asText());
assertEquals("type:date", res.at("/document/fields/1/type").asText());
assertEquals("datum", res.at("/document/fields/1/key").asText());
assertEquals("2018-04-03", res.at("/document/fields/1/value").asText());
assertTrue(res.at("/document/fields/2").isMissingNode());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexExample2.
@Test
public void indexExample2() throws Exception {
String json = new String("{" + " \"fields\" : [" + " { " + " \"primaryData\" : \"abc\"" + " }," + " {" + " \"name\" : \"tokens\"," + " \"data\" : [" + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + " ]" + " }" + " ]," + " \"corpusID\" : \"WPD\"," + " \"ID\" : \"WPD-AAA-00001\"," + " \"textClass\" : \"music entertainment\"," + " \"author\" : \"Peter Frankenfeld\"," + " \"pubDate\" : 20130617," + " \"title\" : \"Wikipedia\"," + " \"subTitle\" : \"Die freie Enzyklopädie\"," + " \"pubPlace\" : \"Bochum\"" + "}");
KrillIndex ki = new KrillIndex();
FieldDocument fd = ki.addDoc(json);
ki.commit();
assertEquals(fd.getPrimaryData(), "abc");
assertEquals(fd.getCorpusID(), "WPD");
assertEquals(fd.getID(), "WPD-AAA-00001");
assertEquals(fd.getFieldValue("textClass"), "music entertainment");
assertEquals(fd.getFieldValue("author"), "Peter Frankenfeld");
assertEquals(fd.getFieldValue("title"), "Wikipedia");
assertEquals(fd.getFieldValue("subTitle"), "Die freie Enzyklopädie");
assertEquals(fd.getFieldValue("pubPlace"), "Bochum");
assertEquals(fd.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17");
QueryBuilder kq = new QueryBuilder("tokens");
Result kr = ki.search((SpanQuery) kq.seq(kq.nr(3, kq.seg("s:b"))).toQuery());
Match km = kr.getMatch(0);
assertEquals(km.getPrimaryData(), "abc");
assertEquals(km.getCorpusID(), "WPD");
assertEquals(km.getDocID(), "WPD-AAA-00001");
assertEquals(km.getFieldValue("textClass"), "music entertainment");
assertEquals(km.getFieldValue("author"), "Peter Frankenfeld");
assertEquals(km.getFieldValue("title"), "Wikipedia");
assertEquals(km.getFieldValue("subTitle"), "Die freie Enzyklopädie");
assertEquals(km.getFieldValue("pubPlace"), "Bochum");
assertEquals(km.getFieldValueAsDate("pubDate").toDisplay(), "2013-06-17");
assertEquals(km.getSnippetBrackets(), "a[[{3:b}]]c");
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexUpsert.
@Test
public void indexUpsert() throws Exception {
KrillIndex ki = new KrillIndex();
// Add new document
FieldDocument fd = new FieldDocument();
fd.addString("textSigle", "AAA/BBB/001");
fd.addString("content", "Example1");
ki.upsertDoc(fd);
ki.commit();
MetaFields mfs = ki.getFields("AAA/BBB/001");
assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
assertEquals(mfs.getFieldValue("indexCreationDate"), mfs.getFieldValue("indexLastModified"));
assertEquals(mfs.getFieldValue("content"), "Example1");
// Add new document
fd = new FieldDocument();
fd.addString("textSigle", "AAA/BBB/002");
fd.addString("content", "Example2");
ki.upsertDoc(fd);
ki.commit();
mfs = ki.getFields("AAA/BBB/002");
assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
assertEquals(mfs.getFieldValue("content"), "Example2");
fd = new FieldDocument();
fd.addString("textSigle", "AAA/BBB/001");
fd.addString("content", "Example3");
ki.upsertDoc(fd);
ki.commit();
mfs = ki.getFields("AAA/BBB/001");
assertEquals(mfs.getFieldValue("indexCreationDate").length(), 10);
assertTrue(mfs.getFieldValue("indexCreationDate").matches("\\d{4}-\\d{2}-\\d{2}"));
assertEquals(mfs.getFieldValue("content"), "Example3");
assertEquals(ki.numberOf("documents"), 2);
// Test Inputstream method
ki.upsertDoc(getClass().getResourceAsStream("/wiki/WPD17-H81-63495.json.gz"), true);
ki.commit();
assertEquals(ki.numberOf("documents"), 3);
ki.close();
fd = new FieldDocument();
fd.addString("textSigle", "AAA/DDD/005");
fd.addString("content", "Example4");
ki.upsertDoc(fd);
ki.commit();
assertEquals(ki.numberOf("documents"), 4);
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFocusIndex method fuzzingTestCompareTotal.
// Annotated fuzzing test
public static void fuzzingTestCompareTotal(List<String> chars, SpanQuery sq1, SpanQuery sq2, int minTextLength, int maxTextLength, int maxDocs) throws IOException, QueryException {
Krill ks1 = new Krill(sq1);
Krill ks2 = new Krill(sq2);
String lastFailureConf = "";
// Multiple runs of corpus creation and query checks
for (int x = 0; x < 100000; x++) {
KrillIndex ki = new KrillIndex();
ArrayList<String> list = new ArrayList<String>();
int c = 0;
// Create a corpus of <= maxDocs fuzzy docs
for (int i = 0; i < (int) (Math.random() * maxDocs); i++) {
FieldDocument testDoc = TestSimple.annotatedFuzzyFieldDoc(chars, minTextLength, maxTextLength);
ki.addDoc(testDoc);
String testString = testDoc.doc.getField("copy").stringValue();
list.add(testString);
}
;
ki.commit();
// Search and compare both queries
Result kr1 = ks1.apply(ki);
Result kr2 = ks2.apply(ki);
if (kr1.getTotalResults() != kr2.getTotalResults()) {
String failureConf = "Failure:" + list.toString();
// Try to keep the failing configuration small
if (lastFailureConf.length() == 0 || failureConf.length() < lastFailureConf.length()) {
System.err.println(failureConf);
lastFailureConf = failureConf;
minTextLength--;
maxDocs--;
}
;
}
;
}
;
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFocusIndex method testFocusSorting.
@Test
public void testFocusSorting() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc());
ki.commit();
SpanElementQuery elemX = new SpanElementQuery("tokens", "x");
assertEquals("<tokens:x />", elemX.toString());
kr = ki.search(elemX, (short) 10);
assertEquals("[[abc]]d", kr.getMatch(0).getSnippetBrackets());
assertEquals("a[[bcd]]", kr.getMatch(1).getSnippetBrackets());
assertEquals(2, kr.getTotalResults());
SpanQuery termB = new SpanTermQuery(new Term("tokens", "s:b"));
SpanQuery termC = new SpanTermQuery(new Term("tokens", "s:c"));
SpanQuery classB = new SpanClassQuery(termB, (byte) 1);
SpanQuery classC = new SpanClassQuery(termC, (byte) 1);
SpanQuery within = new SpanWithinQuery(elemX, classB);
kr = ki.search(within, (short) 10);
assertEquals("[[a{1:b}c]]d", kr.getMatch(0).getSnippetBrackets());
assertEquals("a[[{1:b}cd]]", kr.getMatch(1).getSnippetBrackets());
assertEquals(2, kr.getTotalResults());
SpanQuery or = new SpanOrQuery(classB, classC);
within = new SpanWithinQuery(elemX, or);
kr = ki.search(within, (short) 10);
assertEquals("[[a{1:b}c]]d", kr.getMatch(0).getSnippetBrackets());
assertEquals("[[ab{1:c}]]d", kr.getMatch(1).getSnippetBrackets());
assertEquals("a[[{1:b}cd]]", kr.getMatch(2).getSnippetBrackets());
assertEquals("a[[b{1:c}d]]", kr.getMatch(3).getSnippetBrackets());
assertEquals(4, kr.getTotalResults());
SpanFocusQuery focus = new SpanFocusQuery(within, (byte) 1);
focus.setSorted(false);
kr = ki.search(focus, (short) 10);
assertEquals("focus(1: spanContain(<tokens:x />, spanOr([{1: tokens:s:b}, {1: tokens:s:c}])),sorting)", focus.toString());
assertEquals("a[[{1:b}]]cd", kr.getMatch(0).getSnippetBrackets());
assertEquals("a[[{1:b}]]cd", kr.getMatch(1).getSnippetBrackets());
assertEquals("ab[[{1:c}]]d", kr.getMatch(2).getSnippetBrackets());
assertEquals("ab[[{1:c}]]d", kr.getMatch(3).getSnippetBrackets());
assertEquals(4, kr.getTotalResults());
testFocusSortingOverWindowSize(elemX, classB, classC);
}
Aggregations