use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestMetaFields method searchCollectionFields.
@Test
public void searchCollectionFields() throws IOException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addString("corpusSigle", "ABC");
fd.addString("docSigle", "ABC-123");
fd.addString("textSigle", "ABC-123-0001");
fd.addText("title", "Die Wahlverwandschaften");
fd.addText("author", "Johann Wolfgang von Goethe");
fd.addKeyword("textClass", "reisen wissenschaft");
fd.addInt("pubDate", 20130617);
fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
ki.addDoc(fd);
FieldDocument fd2 = new FieldDocument();
fd2.addString("corpusSigle", "ABC");
fd2.addString("docSigle", "ABC-125");
fd2.addString("textSigle", "ABC-125-0001");
fd2.addText("title", "Die Glocke");
fd2.addText("author", "Schiller, Friedrich");
fd2.addKeyword("textClass", "Reisen geschichte");
fd2.addInt("pubDate", 20130203);
fd2.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
ki.addDoc(fd2);
ki.commit();
// textClass = reisen & wissenschaft
String jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass.jsonld").getFile());
Krill ks = new Krill(jsonString);
KrillCollection kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
// textClass = reisen
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass_2.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(2, kc.numberOf("documents"));
/*
TokenStream ts = fd2.doc.getField("author").tokenStream(
(Analyzer) ki.writer().getAnalyzer(),
(TokenStream) null
);
// OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String term = charTermAttribute.toString();
System.err.println(">>" + term + "<<");
};
*/
// author = wolfgang
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
// author = Wolfgang
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe_2.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
Result kr = ks.apply(ki);
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(kr.toJsonString());
assertEquals(1, res.at("/meta/totalResults").asInt());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestBenchmarkSpans method checkBenchmark1.
@Test
public void checkBenchmark1() throws IOException {
Properties prop = new Properties();
InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
long t1 = 0, t2 = 0;
// / cosmas20.json!!!
String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
int rounds = 100;
Result kr = new Result();
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
// assertEquals("TotalResults", 30751, kr.getTotalResults());
assertEquals("TotalResults", kr.getTotalResults(), 4803739);
// long seconds = (long) (t2 - t1 / 1000) % 60 ;
double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 100 times:
// 43,538 sec
// 4.874
// 1000 times:
// 36.613 sec
// After refactoring
// 100 times
// 273.58114372 seconds
// After intro of attributes
// 100 times
// 350.171506379 seconds
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestBenchmarkSpans method checkBenchmarkClasses.
@Test
public void checkBenchmarkClasses() throws IOException {
// [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
Properties prop = new Properties();
InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
long t1 = 0, t2 = 0;
// Without classes
String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
int rounds = 2000;
Result kr = new Result();
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds without classes");
t1 = 0;
t2 = 0;
// With classes
json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds with classes");
t1 = 0;
t2 = 0;
// With submatch
json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds with submatches");
/**
* HERE IS A BUG!
*/
// System.err.println(kr.toJsonString());
// System.err.println(kr.toJSON());
// System.err.println(kr.getMatch(3).getSnippetBrackets());
// 2000 rounds:
// It took 10.872934435 seconds without classes
// It took 22.581117396 seconds with classes
// It took 10.703933598 seconds without classes
// It took 19.354674517 seconds with classes
// It took 10.939948726 seconds without classes
// It took 16.998470662 seconds with classes
// It took 10.900975837 seconds without classes
// It took 14.902590949 seconds with classes
// It took 10.365989238 seconds without classes
// It took 13.833405885 seconds with classes
// It took 15.368675425 seconds without classes
// It took 18.347603186 seconds with classes
// It took 15.941057294 seconds with submatches
// It took 15.241253549 seconds without classes
// It took 17.30375624 seconds with classes
// It took 15.367171254 seconds with submatches
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestBenchmarkSpans method checkBenchmark3.
@Test
public void checkBenchmark3() throws IOException {
Properties prop = new Properties();
InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
long t1 = 0, t2 = 0;
// / cosmas20.json!!!
String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
int rounds = 500;
Result kr = new Result();
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
assertEquals("TotalResults", kr.getTotalResults(), 70229);
// System.err.println(kr.toJSON());
// long seconds = (long) (t2 - t1 / 1000) % 60 ;
double seconds = (double) (t2 - t1) / 1000000000.0;
System.out.println("It took " + seconds + " seconds");
// 500 times:
// 71.715862716 seconds
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionJSON method metaQuery9.
@Test
public void metaQuery9() {
String metaQuery = getJsonString(getClass().getResource("/queries/metaquery9.jsonld").getFile());
KrillCollection kc = new KrillCollection(metaQuery);
/*
assertEquals(1, kc.getCount());
assertEquals("filter with QueryWrapperFilter(+corpusID:WPD)", kc
.getFilter(0).toString());
*/
assertEquals("corpusID:WPD", kc.toString());
}
Aggregations