use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_tag_document_without_tags_field_for_backward_compatibility.
@Test
public void test_tag_document_without_tags_field_for_backward_compatibility() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
indexer.add(TEST_INDEX, doc);
UpdateRequest removeTagsRequest = new UpdateRequest(TEST_INDEX, doc.getId()).script(new Script(ScriptType.INLINE, "painless", "ctx._source.remove(\"tags\")", new HashMap<>()));
removeTagsRequest.setRefreshPolicy(IMMEDIATE);
es.client.update(removeTagsRequest, RequestOptions.DEFAULT);
assertThat(indexer.tag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("tag"))).isTrue();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_search_source_filtering.
@Test
public void test_search_source_filtering() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc_with_parent.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 444L);
indexer.add(TEST_INDEX, doc);
Document actualDoc = (Document) indexer.search(TEST_INDEX, Document.class).withSource("contentType").execute().collect(toList()).get(0);
assertThat(actualDoc.getContentType()).isEqualTo("application/pdf");
assertThat(actualDoc.getId()).isEqualTo(doc.getId());
assertThat(actualDoc.getContent()).isEmpty();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_search_size_limit.
@Test
public void test_search_size_limit() throws IOException {
for (int i = 0; i < 20; i++) {
Document doc = new org.icij.datashare.text.Document("id" + i, project("prj"), Paths.get(format("doc%d.txt", i)), format("content %d", i), Language.ENGLISH, Charset.defaultCharset(), "text/plain", new HashMap<>(), DONE, new HashSet<>(), 666L);
indexer.add(TEST_INDEX, doc);
}
assertThat(indexer.search(TEST_INDEX, Document.class).limit(5).execute().count()).isEqualTo(5);
assertThat(indexer.search(TEST_INDEX, Document.class).execute().count()).isEqualTo(20);
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_bulk_update.
@Test
public void test_bulk_update() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 34L);
indexer.add(TEST_INDEX, doc);
NamedEntity ne1 = create(PERSON, "John Doe", asList(12L), doc.getId(), "root", CORENLP, Language.FRENCH);
NamedEntity ne2 = create(ORGANIZATION, "AAA", asList(123L), doc.getId(), "root", CORENLP, Language.FRENCH);
indexer.bulkAdd(TEST_INDEX, CORENLP, asList(ne1, ne2), doc);
ne1.hide();
ne2.hide();
assertThat(indexer.bulkUpdate(TEST_INDEX, asList(ne1, ne2))).isTrue();
Object[] namedEntities = indexer.search(TEST_INDEX, NamedEntity.class).execute().toArray();
assertThat(namedEntities.length).isEqualTo(2);
assertThat(((NamedEntity) namedEntities[0]).isHidden()).isTrue();
assertThat(((NamedEntity) namedEntities[1]).isHidden()).isTrue();
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class ElasticsearchIndexerTest method test_tag_document.
@Test
public void test_tag_document() throws IOException {
Document doc = new org.icij.datashare.text.Document("id", project("prj"), Paths.get("doc.txt"), "content", Language.FRENCH, Charset.defaultCharset(), "application/pdf", new HashMap<>(), INDEXED, new HashSet<>(), 123L);
indexer.add(TEST_INDEX, doc);
assertThat(indexer.tag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("foo"), tag("bar"))).isTrue();
assertThat(indexer.tag(project(TEST_INDEX), doc.getId(), doc.getId(), tag("foo"))).isFalse();
List<? extends Entity> lst = indexer.search(TEST_INDEX, Document.class).with(tag("foo"), tag("bar")).execute().collect(toList());
assertThat(lst.size()).isEqualTo(1);
assertThat(((Document) lst.get(0)).getTags()).containsOnly(tag("foo"), tag("bar"));
}
Aggregations