use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.get.GetRequest in project datashare by ICIJ.
the class ElasticsearchSpewerTest method test_duplicate_file.
@Test
public void test_duplicate_file() throws Exception {
DocumentFactory tikaFactory = new DocumentFactory().configure(Options.from(new HashMap<String, String>() {
{
put("idDigestMethod", Document.HASHER.toString());
}
}));
Extractor extractor = new Extractor(tikaFactory);
extractor.setDigester(new UpdatableDigester("project", Document.HASHER.toString()));
final TikaDocument document = extractor.extract(get(Objects.requireNonNull(getClass().getResource("/docs/doc.txt")).getPath()));
final TikaDocument document2 = extractor.extract(get(Objects.requireNonNull(getClass().getResource("/docs/doc-duplicate.txt")).getPath()));
spewer.write(document);
spewer.write(document2);
GetResponse actualDocument = es.client.get(new GetRequest(TEST_INDEX, document.getId()), RequestOptions.DEFAULT);
GetResponse actualDocument2 = es.client.get(new GetRequest(TEST_INDEX, new Duplicate(document2.getPath(), document.getId()).getId()), RequestOptions.DEFAULT);
assertThat(actualDocument.isExists()).isTrue();
assertThat(actualDocument.getSourceAsMap()).includes(entry("type", "Document"));
assertThat(actualDocument2.isExists()).isTrue();
assertThat(actualDocument2.getSourceAsMap()).includes(entry("type", "Duplicate"));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.get.GetRequest in project datashare by ICIJ.
the class ElasticsearchSpewerTest method test_truncated_content.
@Test
public void test_truncated_content() throws Exception {
ElasticsearchSpewer limitedContentSpewer = new ElasticsearchSpewer(es.client, text -> Language.ENGLISH, new FieldNames(), publisher, new PropertiesProvider(new HashMap<String, String>() {
{
put("maxContentLength", "20");
}
})).withRefresh(IMMEDIATE).withIndex("test-datashare");
final TikaDocument document = new DocumentFactory().withIdentifier(new PathIdentifier()).create(get("fake-file.txt"));
final ParsingReader reader = new ParsingReader(new ByteArrayInputStream("this content should be truncated".getBytes()));
document.setReader(reader);
limitedContentSpewer.write(document);
GetResponse documentFields = es.client.get(new GetRequest(TEST_INDEX, document.getId()), RequestOptions.DEFAULT);
assertThat(documentFields.getSourceAsMap()).includes(entry("content", "this content should"));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.get.GetRequest in project datashare by ICIJ.
the class ElasticsearchSpewerTest method test_metadata.
@Test
public void test_metadata() throws Exception {
Path path = get(Objects.requireNonNull(getClass().getResource("/docs/a/b/c/doc.txt")).getPath());
TikaDocument document = new Extractor().extract(path);
spewer.write(document);
GetResponse documentFields = es.client.get(new GetRequest(TEST_INDEX, document.getId()), RequestOptions.DEFAULT);
assertThat(documentFields.getSourceAsMap()).includes(entry("contentEncoding", "ISO-8859-1"), entry("contentType", "text/plain"), entry("nerTags", new ArrayList<>()), entry("contentLength", 45), entry("status", "INDEXED"), entry("path", path.toString()), entry("dirname", path.getParent().toString()));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.get.GetRequest in project datashare by ICIJ.
the class ElasticsearchSpewerTest method test_long_content_length.
@Test
public void test_long_content_length() throws Exception {
final TikaDocument document = new DocumentFactory().withIdentifier(new PathIdentifier()).create(get("t-file.txt"));
final ParsingReader reader = new ParsingReader(new ByteArrayInputStream("test".getBytes()));
document.setReader(reader);
document.getMetadata().set("Content-Length", "7862117376");
spewer.write(document);
GetResponse documentFields = es.client.get(new GetRequest(TEST_INDEX, document.getId()), RequestOptions.DEFAULT);
assertThat(documentFields.getSourceAsMap()).includes(entry("contentLength", 7862117376L));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.get.GetRequest in project incubator-gobblin by apache.
the class ElasticsearchWriterIntegrationTest method testSingleRecordWrite.
@Test
public void testSingleRecordWrite() throws IOException {
for (WriterVariant writerVariant : variants) {
for (RecordTypeGenerator recordVariant : recordGenerators) {
String indexName = "posts" + writerVariant.getName().toLowerCase();
String indexType = recordVariant.getName();
Config config = writerVariant.getConfigBuilder().setIndexName(indexName).setIndexType(indexType).setTypeMapperClassName(recordVariant.getTypeMapperClassName()).setHttpPort(_esTestServer.getHttpPort()).setTransportPort(_esTestServer.getTransportPort()).build();
TestClient testClient = writerVariant.getTestClient(config);
SequentialBasedBatchAccumulator<Object> batchAccumulator = new SequentialBasedBatchAccumulator<>(config);
BufferedAsyncDataWriter bufferedAsyncDataWriter = new BufferedAsyncDataWriter(batchAccumulator, writerVariant.getBatchAsyncDataWriter(config));
String id = TestUtils.generateRandomAlphaString(10);
Object testRecord = recordVariant.getRecord(id, PayloadType.STRING);
DataWriter writer = AsyncWriterManager.builder().failureAllowanceRatio(0.0).retriesEnabled(false).config(config).asyncDataWriter(bufferedAsyncDataWriter).build();
try {
testClient.recreateIndex(indexName);
writer.write(testRecord);
writer.commit();
} finally {
writer.close();
}
try {
GetResponse response = testClient.get(new GetRequest(indexName, indexType, id));
Assert.assertEquals(response.getId(), id, "Response id matches request");
Assert.assertEquals(response.isExists(), true, "Document not found");
} catch (Exception e) {
Assert.fail("Failed to get a response", e);
} finally {
testClient.close();
}
}
}
}
Aggregations