use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class BatchSearchRunnerIntTest method test_search_phrase_matches_with_slop.
@Test
public void test_search_phrase_matches_with_slop() throws Exception {
// with phrase match a permutation (they call it transposition) is 2 slop
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query-phrase.html
Document mydoc = createDoc("docId").with("mydoc find").build();
indexer.add(TEST_INDEX, mydoc);
BatchSearch search = new BatchSearch(project(TEST_INDEX), "name", "desc", asSet("find mydoc"), User.local(), false, null, null, 2, true);
new BatchSearchRunner(indexer, new PropertiesProvider(), search, resultConsumer).call();
verify(resultConsumer).apply(search.uuid, "find mydoc", singletonList(mydoc));
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class BatchSearchRunnerIntTest method test_search_with_file_types_ko.
@Test
public void test_search_with_file_types_ko() throws Exception {
Document mydoc = createDoc("mydoc").build();
indexer.add(TEST_INDEX, mydoc);
BatchSearch searchKo = new BatchSearch(project(TEST_INDEX), "name", "desc", asSet("mydoc"), User.local(), false, singletonList("application/pdf"), null, 0);
new BatchSearchRunner(indexer, new PropertiesProvider(), searchKo, resultConsumer).call();
verify(resultConsumer, never()).apply(eq(searchKo.uuid), eq("mydoc"), anyList());
}
use of org.icij.datashare.text.Document in project datashare by ICIJ.
the class NerResourceTest method test_post_text_returns_NamedEntity_list.
@Test
public void test_post_text_returns_NamedEntity_list() throws Exception {
Document doc = DocumentBuilder.createDoc("inline").with("This the 'foù' file content.").with(ENGLISH).build();
final Annotations annotations = new Annotations("inline", CORENLP, ENGLISH);
annotations.add(NlpStage.NER, 10, 13, NamedEntity.Category.PERSON);
doReturn(asList(NamedEntity.create(NamedEntity.Category.PERSON, "foù", asList(10L), doc.getId(), "root", CORENLP, ENGLISH))).when(pipeline).process(eq(doc));
Response response = post("/api/ner/findNames/CORENLP", doc.getContent()).response();
List actualNerList = TypeConvert.fromJson(response.content(), List.class);
assertThat(actualNerList).hasSize(1);
assertThat(actualNerList.get(0)).isInstanceOf(HashMap.class);
assertThat((Map) actualNerList.get(0)).includes(entry("mention", "foù"), entry("extractor", "CORENLP"), entry("mentionNorm", "fou"), entry("offsets", asList(10)));
}
Aggregations