use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class BatchSearchRunnerTest method test_run_batch_search.
@Test
public void test_run_batch_search() throws Exception {
Document[] documents = { createDoc("doc1").build(), createDoc("doc2").build() };
mockSearch.willReturn(1, documents);
BatchSearch search = new BatchSearch("uuid1", project("test-datashare"), "name1", "desc1", asSet("query1", "query2"), new Date(), BatchSearch.State.QUEUED, User.local());
assertThat(new BatchSearchRunner(indexer, new PropertiesProvider(), search, resultConsumer).call()).isEqualTo(2);
verify(resultConsumer).apply("uuid1", "query1", asList(documents));
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class BatchSearchRunnerIntTest method test_search_with_paths_ko.
@Test
public void test_search_with_paths_ko() throws Exception {
Document mydoc = createDoc("mydoc").build();
indexer.add(TEST_INDEX, mydoc);
BatchSearch searchKo = new BatchSearch(project(TEST_INDEX), "name", "desc", asSet("mydoc"), User.local(), false, null, singletonList("/foo/bar"), 0);
new BatchSearchRunner(indexer, new PropertiesProvider(), searchKo, resultConsumer).call();
verify(resultConsumer, never()).apply(eq(searchKo.uuid), eq("mydoc"), anyList());
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class BatchSearchRunnerIntTest method test_search_with_phraseMatches_with_ner.
@Test
public void test_search_with_phraseMatches_with_ner() throws Exception {
Document mydoc = createDoc("docId").with("anne's doc to find").build();
indexer.add(TEST_INDEX, mydoc);
indexer.add(TEST_INDEX, NamedEntity.create(NamedEntity.Category.PERSON, "anne", asList(12L), mydoc.getId(), mydoc.getRootDocument(), Pipeline.Type.CORENLP, Language.FRENCH));
BatchSearch searchKo = new BatchSearch(project(TEST_INDEX), "name", "desc", asSet("anne doc"), User.local(), false, null, null, true);
BatchSearch searchOk = new BatchSearch(project(TEST_INDEX), "name", "desc", asSet("anne's doc"), User.local(), false, null, null, true);
new BatchSearchRunner(indexer, new PropertiesProvider(), searchKo, resultConsumer).call();
new BatchSearchRunner(indexer, new PropertiesProvider(), searchOk, resultConsumer).call();
verify(resultConsumer, never()).apply(eq(searchKo.uuid), eq("anne doc"), anyList());
verify(resultConsumer).apply(searchOk.uuid, "anne's doc", singletonList(mydoc));
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class NlpConsumerTest method test_on_message_process__chunked_doc_when_doc_is_large.
@Test
public void test_on_message_process__chunked_doc_when_doc_is_large() throws Exception {
when(pipeline.initialize(any())).thenReturn(true);
Document doc = createDoc("huge_doc").with("0123456789abcdef0123456789abcdef+").build();
when(pipeline.process(doc)).thenReturn(emptyList());
when(indexer.get("projectName", doc.getId(), "routing")).thenReturn(doc);
nlpListener.findNamedEntities("projectName", doc.getId(), "routing");
verify(pipeline).initialize(ENGLISH);
verify(pipeline).process(doc, 32, 0);
verify(pipeline).process(doc, 32, 32);
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class DatashareExtractIntegrationTest method test_spew_and_read_index.
@Test
public void test_spew_and_read_index() throws Exception {
Path path = get(getClass().getResource("/docs/doc.txt").getPath());
TikaDocument tikaDocument = createExtractor().extract(path);
spewer.write(tikaDocument);
Document doc = indexer.get(TEST_INDEX, tikaDocument.getId());
assertThat(doc.getId()).isEqualTo(tikaDocument.getId());
assertThat(doc.getContent()).isEqualTo("This is a document to be parsed by datashare.");
assertThat(doc.getLanguage()).isEqualTo(ENGLISH);
assertThat(doc.getContentLength()).isEqualTo(45);
assertThat(doc.getDirname()).contains(get("docs"));
assertThat(doc.getPath()).contains(get("doc.txt"));
assertThat(doc.getContentEncoding()).isEqualTo(Charset.forName("iso-8859-1"));
assertThat(doc.getContentType()).isEqualTo("text/plain");
assertThat(doc.getExtractionLevel()).isEqualTo((short) 0);
assertThat(doc.getMetadata()).hasSize(6);
assertThat(doc.getParentDocument()).isNull();
assertThat(doc.getRootDocument()).isEqualTo(doc.getId());
assertThat(doc.getCreationDate()).isNull();
}
Aggregations