use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_get_unknown_document.
@Test
public void test_get_unknown_document() {
Document doc = indexer.get(TEST_INDEX, "unknown");
assertThat(doc).isNull();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_untag_document.
@Test
public void test_untag_document() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
indexer.add(TEST_INDEX, doc);
indexer.tag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("foo"), tag("bar"), tag("bar"), tag("baz"));
assertThat(indexer.untag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("baz"), tag("foo"))).isTrue();
assertThat(((Document) indexer.get(TEST_INDEX, doc.getId())).getTags()).containsOnly(tag("bar"));
assertThat(indexer.untag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("foo"))).isFalse();
assertThat(indexer.untag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("bar"))).isTrue();
assertThat(((Document) indexer.get(TEST_INDEX, doc.getId())).getTags()).isEmpty();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_search_with_scroll.
@Test
public void test_search_with_scroll() throws IOException {
for (int i = 0; i < 12; i++) {
Document doc = new org.icij.datashare.text.Document("id" + i, project("prj"), Paths.get(format("doc%d.txt", i)), format("content %d", i), Language.ENGLISH, Charset.defaultCharset(), "text/plain", new HashMap<>(), DONE, new HashSet<>(), 345L);
indexer.add(TEST_INDEX, doc);
}
Indexer.Searcher searcher = indexer.search(TEST_INDEX, Document.class).limit(5);
assertThat(searcher.scroll().count()).isEqualTo(5);
assertThat(searcher.totalHits()).isEqualTo(12);
assertThat(searcher.scroll().count()).isEqualTo(5);
assertThat(searcher.scroll().count()).isEqualTo(2);
assertThat(searcher.scroll().count()).isEqualTo(0);
searcher.clearScroll();
assertThat(searcher.scroll().count()).isEqualTo(5);
searcher.clearScroll();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_query_like_js_front_finds_document_from_its_child_named_entity.
@Test
public void test_query_like_js_front_finds_document_from_its_child_named_entity() throws Exception {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content with john doe", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 34L);
indexer.add(TEST_INDEX, doc);
NamedEntity ne1 = create(PERSON, "John Doe", asList(12L), doc.getId(), "root", CORENLP, Language.FRENCH);
indexer.bulkAdd(TEST_INDEX, CORENLP, singletonList(ne1), doc);
Object[] documents = indexer.search(TEST_INDEX, Document.class).withoutSource("content").with("john").execute().toArray();
assertThat(documents.length).isEqualTo(1);
assertThat(((Document) documents[0]).getId()).isEqualTo("id");
assertThat(((Document) documents[0]).getContent()).isEmpty();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_search_with_and_without_NLP_tags.
@Test
public void test_search_with_and_without_NLP_tags() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), DONE, new HashSet<Pipeline.Type>() {
{
add(CORENLP);
add(OPENNLP);
}
}, 123L);
indexer.add(TEST_INDEX, doc);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).without(CORENLP).execute().count()).isEqualTo(0);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).without(CORENLP, OPENNLP).execute().count()).isEqualTo(0);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).without(IXAPIPE).execute().count()).isEqualTo(1);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).with(CORENLP).execute().count()).isEqualTo(1);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).with(CORENLP, OPENNLP).execute().count()).isEqualTo(1);
assertThat((int) indexer.search(TEST_INDEX, Document.class).ofStatus(DONE).with(CORENLP, IXAPIPE).execute().count()).isEqualTo(1);
}
Aggregations