use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource in project beam by apache.
the class ElasticsearchIOTest method testSizes.
@Test
public void testSizes() throws Exception {
ElasticSearchIOTestUtils.insertTestDocuments(ES_INDEX, ES_TYPE, NUM_DOCS, node.client());
PipelineOptions options = PipelineOptionsFactory.create();
ElasticsearchIO.Read read = ElasticsearchIO.read().withConnectionConfiguration(connectionConfiguration);
BoundedElasticsearchSource initialSource = new BoundedElasticsearchSource(read, null);
// can't use equal assert as Elasticsearch indexes never have same size
// (due to internal Elasticsearch implementation)
long estimatedSize = initialSource.getEstimatedSizeBytes(options);
LOG.info("Estimated size: {}", estimatedSize);
assertThat("Wrong estimated size", estimatedSize, greaterThan(AVERAGE_DOC_SIZE * NUM_DOCS));
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource in project beam by apache.
the class ElasticsearchIOTest method testSplit.
@Test
public void testSplit() throws Exception {
ElasticSearchIOTestUtils.insertTestDocuments(ES_INDEX, ES_TYPE, NUM_DOCS, node.client());
PipelineOptions options = PipelineOptionsFactory.create();
ElasticsearchIO.Read read = ElasticsearchIO.read().withConnectionConfiguration(connectionConfiguration);
BoundedElasticsearchSource initialSource = new BoundedElasticsearchSource(read, null);
//desiredBundleSize is ignored because in ES 2.x there is no way to split shards. So we get
// as many bundles as ES shards and bundle size is shard size
int desiredBundleSizeBytes = 0;
List<? extends BoundedSource<String>> splits = initialSource.split(desiredBundleSizeBytes, options);
SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options);
//this is the number of ES shards
// (By default, each index in Elasticsearch is allocated 5 primary shards)
int expectedNumSplits = 5;
assertEquals(expectedNumSplits, splits.size());
int nonEmptySplits = 0;
for (BoundedSource<String> subSource : splits) {
if (readFromSource(subSource, options).size() > 0) {
nonEmptySplits += 1;
}
}
assertEquals("Wrong number of empty splits", expectedNumSplits, nonEmptySplits);
}
Aggregations