use of org.apache.gobblin.writer.BatchAsyncDataWriter in project incubator-gobblin by apache.
the class ElasticsearchDataWriterBuilder method build.
@Override
public DataWriter build() throws IOException {
State state = this.destination.getProperties();
Properties taskProps = state.getProperties();
Config config = ConfigUtils.propertiesToConfig(taskProps);
SequentialBasedBatchAccumulator<JsonObject> batchAccumulator = new SequentialBasedBatchAccumulator<>(taskProps);
BatchAsyncDataWriter asyncDataWriter;
switch(ElasticsearchWriterConfigurationKeys.ClientType.valueOf(ConfigUtils.getString(config, ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE, ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE_DEFAULT).toUpperCase())) {
case REST:
{
asyncDataWriter = new ElasticsearchRestWriter(config);
break;
}
case TRANSPORT:
{
asyncDataWriter = new ElasticsearchTransportClientWriter(config);
break;
}
default:
{
throw new IllegalArgumentException("Need to specify which " + ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE + " client to use (rest/transport)");
}
}
BufferedAsyncDataWriter bufferedAsyncDataWriter = new BufferedAsyncDataWriter(batchAccumulator, asyncDataWriter);
double failureAllowance = ConfigUtils.getDouble(config, ElasticsearchWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_CONFIG, ElasticsearchWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0;
boolean retriesEnabled = ConfigUtils.getBoolean(config, ElasticsearchWriterConfigurationKeys.RETRIES_ENABLED, ElasticsearchWriterConfigurationKeys.RETRIES_ENABLED_DEFAULT);
int maxRetries = ConfigUtils.getInt(config, ElasticsearchWriterConfigurationKeys.MAX_RETRIES, ElasticsearchWriterConfigurationKeys.MAX_RETRIES_DEFAULT);
return AsyncWriterManager.builder().failureAllowanceRatio(failureAllowance).retriesEnabled(retriesEnabled).numRetries(maxRetries).config(config).asyncDataWriter(bufferedAsyncDataWriter).build();
}
use of org.apache.gobblin.writer.BatchAsyncDataWriter in project incubator-gobblin by apache.
the class ElasticsearchWriterIntegrationTest method testMalformedDocs.
/**
* Sends two docs in a single batch with different field types
* Triggers Elasticsearch server to send back an exception due to malformed docs
* @throws IOException
*/
public void testMalformedDocs(WriterVariant writerVariant, RecordTypeGenerator recordVariant, MalformedDocPolicy malformedDocPolicy) throws IOException {
String indexName = writerVariant.getName().toLowerCase();
String indexType = (recordVariant.getName() + malformedDocPolicy.name()).toLowerCase();
Config config = writerVariant.getConfigBuilder().setIdMappingEnabled(true).setIndexName(indexName).setIndexType(indexType).setHttpPort(_esTestServer.getHttpPort()).setTransportPort(_esTestServer.getTransportPort()).setTypeMapperClassName(recordVariant.getTypeMapperClassName()).setMalformedDocPolicy(malformedDocPolicy).build();
TestClient testClient = writerVariant.getTestClient(config);
testClient.recreateIndex(indexName);
String id1 = TestUtils.generateRandomAlphaString(10);
String id2 = TestUtils.generateRandomAlphaString(10);
Object testRecord1 = recordVariant.getRecord(id1, PayloadType.LONG);
Object testRecord2 = recordVariant.getRecord(id2, PayloadType.MAP);
SequentialBasedBatchAccumulator<Object> batchAccumulator = new SequentialBasedBatchAccumulator<>(config);
BatchAsyncDataWriter elasticsearchWriter = writerVariant.getBatchAsyncDataWriter(config);
BufferedAsyncDataWriter bufferedAsyncDataWriter = new BufferedAsyncDataWriter(batchAccumulator, elasticsearchWriter);
DataWriter writer = AsyncWriterManager.builder().failureAllowanceRatio(0.0).retriesEnabled(false).config(config).asyncDataWriter(bufferedAsyncDataWriter).build();
try {
writer.write(testRecord1);
writer.write(testRecord2);
writer.commit();
writer.close();
if (malformedDocPolicy == MalformedDocPolicy.FAIL) {
Assert.fail("Should have thrown an exception if malformed doc policy was set to Fail");
}
} catch (Exception e) {
switch(malformedDocPolicy) {
case IGNORE:
case WARN:
{
Assert.fail("Should not have failed if malformed doc policy was set to ignore or warn", e);
break;
}
case FAIL:
{
// pass through
break;
}
default:
{
throw new RuntimeException("This test does not handle this policyType : " + malformedDocPolicy.toString());
}
}
}
// Irrespective of policy, first doc should be inserted and second doc should fail
int docsIndexed = 0;
try {
{
GetResponse response = testClient.get(new GetRequest(indexName, indexType, id1));
Assert.assertEquals(response.getId(), id1, "Response id matches request");
System.out.println(malformedDocPolicy + ":" + response.toString());
if (response.isExists()) {
docsIndexed++;
}
}
{
GetResponse response = testClient.get(new GetRequest(indexName, indexType, id2));
Assert.assertEquals(response.getId(), id2, "Response id matches request");
System.out.println(malformedDocPolicy + ":" + response.toString());
if (response.isExists()) {
docsIndexed++;
}
}
// only one doc should be found
Assert.assertEquals(docsIndexed, 1, "Only one document should be indexed");
} catch (Exception e) {
Assert.fail("Failed to get a response", e);
} finally {
testClient.close();
}
}
Aggregations