Search in sources :

Example 1 with BatchAsyncDataWriter

use of org.apache.gobblin.writer.BatchAsyncDataWriter in project incubator-gobblin by apache.

the class ElasticsearchDataWriterBuilder method build.

@Override
public DataWriter build() throws IOException {
    State state = this.destination.getProperties();
    Properties taskProps = state.getProperties();
    Config config = ConfigUtils.propertiesToConfig(taskProps);
    SequentialBasedBatchAccumulator<JsonObject> batchAccumulator = new SequentialBasedBatchAccumulator<>(taskProps);
    BatchAsyncDataWriter asyncDataWriter;
    switch(ElasticsearchWriterConfigurationKeys.ClientType.valueOf(ConfigUtils.getString(config, ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE, ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE_DEFAULT).toUpperCase())) {
        case REST:
            {
                asyncDataWriter = new ElasticsearchRestWriter(config);
                break;
            }
        case TRANSPORT:
            {
                asyncDataWriter = new ElasticsearchTransportClientWriter(config);
                break;
            }
        default:
            {
                throw new IllegalArgumentException("Need to specify which " + ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_CLIENT_TYPE + " client to use (rest/transport)");
            }
    }
    BufferedAsyncDataWriter bufferedAsyncDataWriter = new BufferedAsyncDataWriter(batchAccumulator, asyncDataWriter);
    double failureAllowance = ConfigUtils.getDouble(config, ElasticsearchWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_CONFIG, ElasticsearchWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0;
    boolean retriesEnabled = ConfigUtils.getBoolean(config, ElasticsearchWriterConfigurationKeys.RETRIES_ENABLED, ElasticsearchWriterConfigurationKeys.RETRIES_ENABLED_DEFAULT);
    int maxRetries = ConfigUtils.getInt(config, ElasticsearchWriterConfigurationKeys.MAX_RETRIES, ElasticsearchWriterConfigurationKeys.MAX_RETRIES_DEFAULT);
    return AsyncWriterManager.builder().failureAllowanceRatio(failureAllowance).retriesEnabled(retriesEnabled).numRetries(maxRetries).config(config).asyncDataWriter(bufferedAsyncDataWriter).build();
}
Also used : Config(com.typesafe.config.Config) JsonObject(com.google.gson.JsonObject) BufferedAsyncDataWriter(org.apache.gobblin.writer.BufferedAsyncDataWriter) Properties(java.util.Properties) SequentialBasedBatchAccumulator(org.apache.gobblin.writer.SequentialBasedBatchAccumulator) State(org.apache.gobblin.configuration.State) BatchAsyncDataWriter(org.apache.gobblin.writer.BatchAsyncDataWriter)

Example 2 with BatchAsyncDataWriter

use of org.apache.gobblin.writer.BatchAsyncDataWriter in project incubator-gobblin by apache.

the class ElasticsearchWriterIntegrationTest method testMalformedDocs.

/**
 * Sends two docs in a single batch with different field types
 * Triggers Elasticsearch server to send back an exception due to malformed docs
 * @throws IOException
 */
public void testMalformedDocs(WriterVariant writerVariant, RecordTypeGenerator recordVariant, MalformedDocPolicy malformedDocPolicy) throws IOException {
    String indexName = writerVariant.getName().toLowerCase();
    String indexType = (recordVariant.getName() + malformedDocPolicy.name()).toLowerCase();
    Config config = writerVariant.getConfigBuilder().setIdMappingEnabled(true).setIndexName(indexName).setIndexType(indexType).setHttpPort(_esTestServer.getHttpPort()).setTransportPort(_esTestServer.getTransportPort()).setTypeMapperClassName(recordVariant.getTypeMapperClassName()).setMalformedDocPolicy(malformedDocPolicy).build();
    TestClient testClient = writerVariant.getTestClient(config);
    testClient.recreateIndex(indexName);
    String id1 = TestUtils.generateRandomAlphaString(10);
    String id2 = TestUtils.generateRandomAlphaString(10);
    Object testRecord1 = recordVariant.getRecord(id1, PayloadType.LONG);
    Object testRecord2 = recordVariant.getRecord(id2, PayloadType.MAP);
    SequentialBasedBatchAccumulator<Object> batchAccumulator = new SequentialBasedBatchAccumulator<>(config);
    BatchAsyncDataWriter elasticsearchWriter = writerVariant.getBatchAsyncDataWriter(config);
    BufferedAsyncDataWriter bufferedAsyncDataWriter = new BufferedAsyncDataWriter(batchAccumulator, elasticsearchWriter);
    DataWriter writer = AsyncWriterManager.builder().failureAllowanceRatio(0.0).retriesEnabled(false).config(config).asyncDataWriter(bufferedAsyncDataWriter).build();
    try {
        writer.write(testRecord1);
        writer.write(testRecord2);
        writer.commit();
        writer.close();
        if (malformedDocPolicy == MalformedDocPolicy.FAIL) {
            Assert.fail("Should have thrown an exception if malformed doc policy was set to Fail");
        }
    } catch (Exception e) {
        switch(malformedDocPolicy) {
            case IGNORE:
            case WARN:
                {
                    Assert.fail("Should not have failed if malformed doc policy was set to ignore or warn", e);
                    break;
                }
            case FAIL:
                {
                    // pass through
                    break;
                }
            default:
                {
                    throw new RuntimeException("This test does not handle this policyType : " + malformedDocPolicy.toString());
                }
        }
    }
    // Irrespective of policy, first doc should be inserted and second doc should fail
    int docsIndexed = 0;
    try {
        {
            GetResponse response = testClient.get(new GetRequest(indexName, indexType, id1));
            Assert.assertEquals(response.getId(), id1, "Response id matches request");
            System.out.println(malformedDocPolicy + ":" + response.toString());
            if (response.isExists()) {
                docsIndexed++;
            }
        }
        {
            GetResponse response = testClient.get(new GetRequest(indexName, indexType, id2));
            Assert.assertEquals(response.getId(), id2, "Response id matches request");
            System.out.println(malformedDocPolicy + ":" + response.toString());
            if (response.isExists()) {
                docsIndexed++;
            }
        }
        // only one doc should be found
        Assert.assertEquals(docsIndexed, 1, "Only one document should be indexed");
    } catch (Exception e) {
        Assert.fail("Failed to get a response", e);
    } finally {
        testClient.close();
    }
}
Also used : Config(com.typesafe.config.Config) BufferedAsyncDataWriter(org.apache.gobblin.writer.BufferedAsyncDataWriter) SequentialBasedBatchAccumulator(org.apache.gobblin.writer.SequentialBasedBatchAccumulator) GetResponse(org.elasticsearch.action.get.GetResponse) IOException(java.io.IOException) GetRequest(org.elasticsearch.action.get.GetRequest) BatchAsyncDataWriter(org.apache.gobblin.writer.BatchAsyncDataWriter) DataWriter(org.apache.gobblin.writer.DataWriter) BufferedAsyncDataWriter(org.apache.gobblin.writer.BufferedAsyncDataWriter) BatchAsyncDataWriter(org.apache.gobblin.writer.BatchAsyncDataWriter)

Aggregations

Config (com.typesafe.config.Config)2 BatchAsyncDataWriter (org.apache.gobblin.writer.BatchAsyncDataWriter)2 BufferedAsyncDataWriter (org.apache.gobblin.writer.BufferedAsyncDataWriter)2 SequentialBasedBatchAccumulator (org.apache.gobblin.writer.SequentialBasedBatchAccumulator)2 JsonObject (com.google.gson.JsonObject)1 IOException (java.io.IOException)1 Properties (java.util.Properties)1 State (org.apache.gobblin.configuration.State)1 DataWriter (org.apache.gobblin.writer.DataWriter)1 GetRequest (org.elasticsearch.action.get.GetRequest)1 GetResponse (org.elasticsearch.action.get.GetResponse)1