Search in sources :

Example 16 with Document

use of org.icij.datashare.text.Document in project datashare by ICIJ.

the class ElasticsearchIndexerTest method test_delete_by_query.

@Test
public void test_delete_by_query() throws Exception {
    Document doc = new org.icij.datashare.text.Document("docId", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 4324L);
    indexer.add(TEST_INDEX, doc);
    indexer.add(TEST_INDEX, create(PERSON, "Joe Foo", asList(2L), "docId", "root", CORENLP, Language.FRENCH));
    indexer.add(TEST_INDEX, create(PERSON, "John Doe", asList(12L), "docId", "root", CORENLP, Language.FRENCH));
    assertThat(indexer.deleteAll(TEST_INDEX)).isTrue();
    Object[] documents = indexer.search(TEST_INDEX, Document.class).execute().toArray();
    assertThat(documents.length).isEqualTo(0);
}
Also used : Document(org.icij.datashare.text.Document) Test(org.junit.Test)

Example 17 with Document

use of org.icij.datashare.text.Document in project datashare by ICIJ.

the class ElasticsearchIndexerTest method test_search_with_status.

@Test
public void test_search_with_status() throws IOException {
    Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
    indexer.add(TEST_INDEX, doc);
    List<? extends Entity> lst = indexer.search(TEST_INDEX, Document.class).ofStatus(INDEXED).execute().collect(toList());
    assertThat(lst.size()).isEqualTo(1);
    assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).execute().count()).isEqualTo(0);
}
Also used : Document(org.icij.datashare.text.Document) Test(org.junit.Test)

Example 18 with Document

use of org.icij.datashare.text.Document in project datashare by ICIJ.

the class ElasticsearchIndexerTest method test_group_tag_untag_documents.

@Test
public void test_group_tag_untag_documents() throws IOException {
    Document doc1 = new org.icij.datashare.text.Document("id1", project("prj"), Paths.get("doc1.txt"), "content1", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
    Document doc2 = new org.icij.datashare.text.Document("id2", project("prj"), Paths.get("doc2.txt"), "content2", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
    indexer.add(TEST_INDEX, doc1);
    indexer.add(TEST_INDEX, doc2);
    assertThat(indexer.tag(project(TEST_INDEX), asList("id1", "id2"), tag("foo"), tag("bar"))).isTrue();
    assertThat(((Document) indexer.get(TEST_INDEX, "id1")).getTags()).containsOnly(tag("foo"), tag("bar"));
    assertThat(((Document) indexer.get(TEST_INDEX, "id2")).getTags()).containsOnly(tag("foo"), tag("bar"));
    assertThat(indexer.untag(project(TEST_INDEX), asList("id1", "id2"), tag("foo"), tag("bar"))).isTrue();
    assertThat(((Document) indexer.get(TEST_INDEX, "id1")).getTags()).isEmpty();
    assertThat(((Document) indexer.get(TEST_INDEX, "id2")).getTags()).isEmpty();
}
Also used : Document(org.icij.datashare.text.Document) Test(org.junit.Test)

Example 19 with Document

use of org.icij.datashare.text.Document in project datashare by ICIJ.

the class ElasticsearchIndexerTest method test_update_named_entity.

@Test
public void test_update_named_entity() throws IOException {
    Document parent = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content Madeline", Language.FRENCH, Charset.defaultCharset(), "text/plain", new HashMap<>(), DONE, new HashSet<>(), 123L);
    NamedEntity ne = create(PERSON, "Madeline", asList(8L), parent.getId(), "root", CORENLP, Language.ENGLISH);
    indexer.add(TEST_INDEX, parent);
    indexer.add(TEST_INDEX, ne);
    ne.hide();
    indexer.update(TEST_INDEX, ne);
    NamedEntity neFromES = indexer.get(TEST_INDEX, ne.getId(), parent.getId());
    assertThat(neFromES.isHidden()).isTrue();
}
Also used : NamedEntity(org.icij.datashare.text.NamedEntity) Document(org.icij.datashare.text.Document) Test(org.junit.Test)

Example 20 with Document

use of org.icij.datashare.text.Document in project datashare by ICIJ.

the class ElasticsearchIndexerTest method test_search_with_json_query.

@Test
public void test_search_with_json_query() throws IOException {
    Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
    indexer.add(TEST_INDEX, doc);
    String query = "{\"bool\":{\"must\":[{\"match_all\":{}},{\"bool\":{\"should\":[{\"query_string\":{\"query\":\"*\"}}]}},{\"match\":{\"type\":\"Document\"}}]}}";
    List<? extends Entity> lst = indexer.search(TEST_INDEX, Document.class).set(JsonObjectMapper.MAPPER.readTree(query)).execute().collect(toList());
    assertThat(lst.size()).isEqualTo(1);
}
Also used : Document(org.icij.datashare.text.Document) Test(org.junit.Test)

Aggregations

Document (org.icij.datashare.text.Document)63 Test (org.junit.Test)48 PropertiesProvider (org.icij.datashare.PropertiesProvider)19 BatchSearch (org.icij.datashare.batch.BatchSearch)15 NamedEntity (org.icij.datashare.text.NamedEntity)11 TikaDocument (org.icij.extract.document.TikaDocument)10 HashMap (java.util.HashMap)9 Path (java.nio.file.Path)6 Date (java.util.Date)5 Indexer (org.icij.datashare.text.indexing.Indexer)5 File (java.io.File)4 IOException (java.io.IOException)4 InputStream (java.io.InputStream)4 IntStream (java.util.stream.IntStream)4 DocumentBuilder.createDoc (org.icij.datashare.text.DocumentBuilder.createDoc)4 Project.project (org.icij.datashare.text.Project.project)4 User (org.icij.datashare.user.User)4 Rule (org.junit.Rule)4 Arrays.asList (java.util.Arrays.asList)3 List (java.util.List)3