Search in sources :

Example 26 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class Resource method add.

/**
 * Add new documents to the index
 *
 * @param json
 *            JSON-LD string with search and potential meta
 *            filters.
 */
/*
     * Support GZip:
     * Or maybe it's already supported ...
     * http://stackoverflow.com/questions/19765582/how-to-make-jersey-use-gzip-compression-for-the-response-message-body
    */
@PUT
@Path("/index/{textID}")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public String add(@PathParam("textID") Integer uid, @Context UriInfo uri, String json) {
    // Todo: Parameter for server node
    if (DEBUG)
        log.trace("Added new document with unique identifier {}", uid);
    final Response kresp = _initResponse();
    if (kresp.hasErrors())
        return kresp.toJsonString();
    // Get index
    index = Node.getIndex();
    FieldDocument fd = index.addDoc(uid, json);
    if (fd == null) {
        // Set HTTP to ???
        // TODO: This may be a field error!
        kresp.addError(602, "Unable to add document to index");
        return kresp.toJsonString();
    }
    ;
    // Set HTTP to 200
    kresp.addMessage(681, "Document was added successfully", fd.getID() != null ? fd.getID() : "Unknown");
    // Mirror meta data
    kresp.addJsonNode("text", (ObjectNode) fd.toJsonNode());
    return kresp.toJsonString();
}
Also used : Response(de.ids_mannheim.korap.response.Response) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) Consumes(javax.ws.rs.Consumes) PUT(javax.ws.rs.PUT)

Example 27 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillIndex method indexExample.

/*
     * Todo: Currently fields can only be set if they are
     * part of the general field set.
     * this will change soon!
     */
@Test
public void indexExample() throws IOException {
    KrillIndex ki = new KrillIndex();
    assertEquals(0, ki.numberOf("base", "documents"));
    assertEquals(0, ki.numberOf("base", "tokens"));
    assertEquals(0, ki.numberOf("base", "sentences"));
    assertEquals(0, ki.numberOf("base", "paragraphs"));
    FieldDocument fd = new FieldDocument();
    fd.addString("name", "Peter");
    fd.addInt("zahl1", 56);
    fd.addInt("zahl2", "58");
    fd.addInt("zahl3", "059");
    fd.addInt("UID", 1);
    fd.addText("teaser", "Das ist der Name der Rose");
    fd.addTV("base", "ich bau", "[(0-3)s:ich|l:ich|p:PPER|-:sentences$<i>2]" + "[(4-7)s:bau|l:bauen|p:VVFIN]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addString("name", "Hans");
    fd.addInt("zahl1", 14);
    fd.addText("teaser", "Das Sein");
    fd.addInt("UID", 2);
    MultiTermTokenStream mtts = fd.newMultiTermTokenStream();
    mtts.addMultiTermToken("s:wir#0-3", "l:wir", "p:PPER");
    mtts.addMultiTermToken("s:sind#4-8", "l:sein", "p:VVFIN");
    mtts.addMeta("sentences", (int) 5);
    fd.addTV("base", "wir sind", mtts);
    ki.addDoc(fd);
    /* Save documents */
    ki.commit();
    assertEquals(2, ki.numberOf("base", "documents"));
    assertEquals(7, ki.numberOf("base", "sentences"));
    fd = new FieldDocument();
    fd.addString("name", "Frank");
    fd.addInt("zahl1", 59);
    fd.addInt("zahl2", 65);
    fd.addInt("UID", 3);
    fd.addText("teaser", "Noch ein Versuch");
    fd.addTV("base", "ich bau", "[(0-3)s:der|l:der|p:DET|-:sentences$<i>3]" + "[(4-8)s:baum|l:baum|p:NN]");
    ki.addDoc(fd);
    /* Save documents */
    ki.commit();
    assertEquals(3, ki.numberOf("base", "documents"));
    assertEquals(10, ki.numberOf("base", "sentences"));
    // KrillQuery kq = new KrillQuery("text");
    // ki.search();
    ki.getDoc("1");
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) MultiTermTokenStream(de.ids_mannheim.korap.index.MultiTermTokenStream) Test(org.junit.Test)

Example 28 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillIndex method indexRetrieveFieldInfo.

@Test
public void indexRetrieveFieldInfo() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("name", "Peter");
    fd.addString("textSigle", "a/b/c");
    fd.addInt("zahl1", 56);
    fd.addStored("ref", "My reference");
    fd.addKeyword("keyword", "baum");
    fd.addKeyword("keyword", "wald");
    fd.addText("title", "Der Name der Rose");
    ki.addDoc(fd);
    /* Save documents */
    ki.commit();
    JsonNode res = ki.getFields("a/b/c").toJsonNode();
    // TODO: Check if the sorting is always identical!
    Iterator fieldIter = res.at("/document/fields").elements();
    int checkC = 0;
    while (fieldIter.hasNext()) {
        JsonNode field = (JsonNode) fieldIter.next();
        String key = field.at("/key").asText();
        switch(key) {
            case "ref":
                assertEquals("type:store", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("My reference", field.at("/value").asText());
                checkC++;
                break;
            case "title":
                assertEquals("type:text", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Der Name der Rose", field.at("/value").asText());
                checkC++;
                break;
            case "textSigle":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("a/b/c", field.at("/value").asText());
                checkC++;
                break;
            case "keyword":
                assertEquals("type:keywords", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("baum", field.at("/value/0").asText());
                assertEquals("wald", field.at("/value/1").asText());
                checkC++;
                break;
            case "zahl1":
                assertEquals("type:number", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals(56, field.at("/value").asInt());
                checkC++;
                break;
            case "name":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Peter", field.at("/value").asText());
                checkC++;
                break;
        }
        ;
    }
    ;
    assertEquals(6, checkC);
    // Test with real document
    ki.addDoc(getClass().getResourceAsStream("/wiki/wdd17-982-72848.json.gz"), true);
    /* Save documents */
    ki.commit();
    res = ki.getFields("wdd17/982/72841").toJsonNode();
    assertEquals("Document not found", res.at("/errors/0/1").asText());
    res = ki.getFields("WDD17/982/72848").toJsonNode();
    fieldIter = res.at("/document/fields").elements();
    checkC = 0;
    while (fieldIter.hasNext()) {
        JsonNode field = (JsonNode) fieldIter.next();
        String key = field.at("/key").asText();
        switch(key) {
            case "pubDate":
                assertEquals("type:date", field.at("/type").asText());
                assertEquals("2017-07-01", field.at("/value").asText());
                break;
            case "textSigle":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("WDD17/982/72848", field.at("/value").asText());
                break;
            case "foundries":
                assertEquals("type:keywords", field.at("/type").asText());
                assertEquals("dereko", field.at("/value/0").asText());
                assertEquals("dereko/structure", field.at("/value/1").asText());
                assertEquals("dereko/structure/base-sentences-paragraphs-pagebreaks", field.at("/value/2").asText());
                break;
        }
        ;
    }
    ;
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 29 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillIndex method indexAlteration.

@Test
public void indexAlteration() throws IOException {
    KrillIndex ki = new KrillIndex();
    assertEquals(0, ki.numberOf("base", "documents"));
    FieldDocument fd = new FieldDocument();
    fd.addString("name", "Peter");
    ki.addDoc(fd);
    assertEquals(0, ki.numberOf("base", "documents"));
    fd = new FieldDocument();
    fd.addString("name", "Michael");
    ki.addDoc(fd);
    assertEquals(0, ki.numberOf("base", "documents"));
    ki.commit();
    assertEquals(2, ki.numberOf("base", "documents"));
// hasDeletions, hasPendingMerges
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 30 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillIndex method indexFieldInfo.

@Test
public void indexFieldInfo() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.setTitle("Peter");
    fd.setUID(22);
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.setTitle("Akron");
    fd.setUID("05678");
    ki.addDoc(fd);
    ki.commit();
    assertEquals(2, ki.numberOf("base", "documents"));
    assertEquals("Peter", ki.getDoc("22").getTitle());
    assertEquals(22, ki.getDoc("22").getUID());
    assertEquals("Akron", ki.getDoc("5678").getTitle());
    assertEquals(5678, ki.getDoc("5678").getUID());
    assertEquals("Akron", ki.getDoc("05678").getTitle());
    assertEquals(5678, ki.getDoc("05678").getUID());
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Aggregations

FieldDocument (de.ids_mannheim.korap.index.FieldDocument)40 KrillIndex (de.ids_mannheim.korap.KrillIndex)28 Test (org.junit.Test)28 Result (de.ids_mannheim.korap.response.Result)20 Krill (de.ids_mannheim.korap.Krill)15 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)14 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Match (de.ids_mannheim.korap.response.Match)7 SpanQuery (org.apache.lucene.search.spans.SpanQuery)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)4 Test (de.ids_mannheim.korap.Test)4 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 Response (de.ids_mannheim.korap.response.Response)1 SearchContext (de.ids_mannheim.korap.response.SearchContext)1 Consumes (javax.ws.rs.Consumes)1 PUT (javax.ws.rs.PUT)1 Path (javax.ws.rs.Path)1