Search in sources :

Example 16 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestMetaFields method searchCollectionFields.

@Test
public void searchCollectionFields() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusSigle", "ABC");
    fd.addString("docSigle", "ABC-123");
    fd.addString("textSigle", "ABC-123-0001");
    fd.addText("title", "Die Wahlverwandschaften");
    fd.addText("author", "Johann Wolfgang von Goethe");
    fd.addKeyword("textClass", "reisen wissenschaft");
    fd.addInt("pubDate", 20130617);
    fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
    ki.addDoc(fd);
    FieldDocument fd2 = new FieldDocument();
    fd2.addString("corpusSigle", "ABC");
    fd2.addString("docSigle", "ABC-125");
    fd2.addString("textSigle", "ABC-125-0001");
    fd2.addText("title", "Die Glocke");
    fd2.addText("author", "Schiller, Friedrich");
    fd2.addKeyword("textClass", "Reisen geschichte");
    fd2.addInt("pubDate", 20130203);
    fd2.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
    ki.addDoc(fd2);
    ki.commit();
    // textClass = reisen & wissenschaft
    String jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass.jsonld").getFile());
    Krill ks = new Krill(jsonString);
    KrillCollection kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    // textClass = reisen
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass_2.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(2, kc.numberOf("documents"));
    /*
        TokenStream ts = fd2.doc.getField("author").tokenStream(
            (Analyzer) ki.writer().getAnalyzer(),
            (TokenStream) null
                                                                  );
        // OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
        CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
        
        ts.reset();
        while (ts.incrementToken()) {
            String term = charTermAttribute.toString();
            System.err.println(">>" + term + "<<");
        };
        */
    // author = wolfgang
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    // author = Wolfgang
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe_2.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    Result kr = ks.apply(ki);
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    assertEquals(1, res.at("/meta/totalResults").asInt());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 17 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmark1.

@Test
public void checkBenchmark1() throws IOException {
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // / cosmas20.json!!!
    String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
    int rounds = 100;
    Result kr = new Result();
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    // assertEquals("TotalResults", 30751, kr.getTotalResults());
    assertEquals("TotalResults", kr.getTotalResults(), 4803739);
    // long seconds = (long) (t2 - t1 / 1000) % 60 ;
    double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 100 times:
// 43,538 sec
// 4.874
// 1000 times:
// 36.613 sec
// After refactoring
// 100 times
// 273.58114372 seconds
// After intro of attributes
// 100 times
// 350.171506379 seconds
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 18 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmarkClasses.

@Test
public void checkBenchmarkClasses() throws IOException {
    // [orth=Der]{1:[orth=Mann]{2:[orth=und]}}
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // Without classes
    String json = getString(getClass().getResource("/queries/benchmark5-ohne.jsonld").getFile());
    int rounds = 2000;
    Result kr = new Result();
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    double seconds = (double) (t2 - t1) / 1000000000.0;
    // System.out.println("It took " + seconds + " seconds without classes");
    t1 = 0;
    t2 = 0;
    // With classes
    json = getString(getClass().getResource("/queries/benchmark5.jsonld").getFile());
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    seconds = (double) (t2 - t1) / 1000000000.0;
    // System.out.println("It took " + seconds + " seconds with classes");
    t1 = 0;
    t2 = 0;
    // With submatch
    json = getString(getClass().getResource("/queries/benchmark5-submatch.jsonld").getFile());
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds with submatches");
/**
 * HERE IS A BUG!
 */
// System.err.println(kr.toJsonString());
// System.err.println(kr.toJSON());
// System.err.println(kr.getMatch(3).getSnippetBrackets());
// 2000 rounds:
// It took 10.872934435 seconds without classes
// It took 22.581117396 seconds with classes
// It took 10.703933598 seconds without classes
// It took 19.354674517 seconds with classes
// It took 10.939948726 seconds without classes
// It took 16.998470662 seconds with classes
// It took 10.900975837 seconds without classes
// It took 14.902590949 seconds with classes
// It took 10.365989238 seconds without classes
// It took 13.833405885 seconds with classes
// It took 15.368675425 seconds without classes
// It took 18.347603186 seconds with classes
// It took 15.941057294 seconds with submatches
// It took 15.241253549 seconds without classes
// It took 17.30375624 seconds with classes
// It took 15.367171254 seconds with submatches
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 19 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmark3.

@Test
public void checkBenchmark3() throws IOException {
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // / cosmas20.json!!!
    String json = getString(getClass().getResource("/queries/benchmark3.jsonld").getFile());
    int rounds = 500;
    Result kr = new Result();
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    assertEquals("TotalResults", kr.getTotalResults(), 70229);
    // System.err.println(kr.toJSON());
    // long seconds = (long) (t2 - t1 / 1000) % 60 ;
    double seconds = (double) (t2 - t1) / 1000000000.0;
    System.out.println("It took " + seconds + " seconds");
// 500 times:
// 71.715862716 seconds
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 20 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionJSON method metaQuery9.

@Test
public void metaQuery9() {
    String metaQuery = getJsonString(getClass().getResource("/queries/metaquery9.jsonld").getFile());
    KrillCollection kc = new KrillCollection(metaQuery);
    /*
        assertEquals(1, kc.getCount());
        assertEquals("filter with QueryWrapperFilter(+corpusID:WPD)", kc
                .getFilter(0).toString());
        */
    assertEquals("corpusID:WPD", kc.toString());
}
Also used : KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Aggregations

KrillCollection (de.ids_mannheim.korap.KrillCollection)37 Test (org.junit.Test)35 KrillIndex (de.ids_mannheim.korap.KrillIndex)24 Krill (de.ids_mannheim.korap.Krill)17 Result (de.ids_mannheim.korap.response.Result)15 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)11 SearchContext (de.ids_mannheim.korap.response.SearchContext)5 MMapDirectory (org.apache.lucene.store.MMapDirectory)5 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)4 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)4 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 Term (org.apache.lucene.index.Term)2 SpanQuery (org.apache.lucene.search.spans.SpanQuery)2 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)1 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)1