Search in sources :

Example 1 with ElasticsearchInputProperties

use of org.talend.components.elasticsearch.input.ElasticsearchInputProperties in project components by Talend.

the class ElasticsearchDatasetRuntime method getSample.

@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
    // Create an input runtime based on the properties.
    ElasticsearchInputRuntime inputRuntime = new ElasticsearchInputRuntime();
    ElasticsearchInputProperties inputProperties = new ElasticsearchInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(null, inputProperties);
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);
    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        // 
        p.apply(inputRuntime).apply(Sample.<IndexedRecord>any(limit)).apply(collector);
        p.run().waitUntilFinish();
    }
}
Also used : ElasticsearchInputProperties(org.talend.components.elasticsearch.input.ElasticsearchInputProperties) IndexedRecord(org.apache.avro.generic.IndexedRecord) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Example 2 with ElasticsearchInputProperties

use of org.talend.components.elasticsearch.input.ElasticsearchInputProperties in project components by Talend.

the class ElasticsearchBeamRuntimeTestIT method filterTest.

@Test
public void filterTest() throws MalformedURLException {
    final String TYPE_NAME = "filtertest";
    List<String> records = Arrays.asList("r1", "r2", "r3", "q1", "q2");
    List<String> expectedRecords = Arrays.asList("r1", "r2", "r3");
    List<IndexedRecord> expectedRecord = new ArrayList<>();
    for (String record : expectedRecords) {
        expectedRecord.add(ConvertToIndexedRecord.convertToAvro(record));
    }
    List<IndexedRecord> avroRecords = new ArrayList<>();
    for (String record : records) {
        avroRecords.add(ConvertToIndexedRecord.convertToAvro(record));
    }
    ElasticsearchDatasetProperties datasetProperties = new ElasticsearchDatasetProperties("datasetProperties");
    datasetProperties.init();
    datasetProperties.setDatastoreProperties(datastoreProperties);
    datasetProperties.index.setValue(INDEX_NAME);
    datasetProperties.type.setValue(TYPE_NAME);
    ElasticsearchOutputProperties outputProperties = new ElasticsearchOutputProperties("outputProperties");
    outputProperties.init();
    outputProperties.setDatasetProperties(datasetProperties);
    ElasticsearchOutputRuntime outputRuntime = new ElasticsearchOutputRuntime();
    outputRuntime.initialize(null, outputProperties);
    PCollection<IndexedRecord> inputRecords = (PCollection<IndexedRecord>) pipeline.apply(Create.of(avroRecords).withCoder(LazyAvroCoder.of()));
    inputRecords.apply(outputRuntime);
    pipeline.run();
    ElasticsearchTestUtils.upgradeIndexAndGetCurrentNumDocs(INDEX_NAME, TYPE_NAME, client);
    // input pipeline start
    ElasticsearchInputProperties inputProperties = new ElasticsearchInputProperties("inputProperties");
    inputProperties.init();
    inputProperties.setDatasetProperties(datasetProperties);
    inputProperties.query.setValue("{\"query\":{\"regexp\":{\"field\":\"r[1-3]\"}}}");
    ElasticsearchInputRuntime inputRuntime = new ElasticsearchInputRuntime();
    inputRuntime.initialize(null, inputProperties);
    PCollection<IndexedRecord> outputRecords = pipeline.apply(inputRuntime);
    PAssert.that(outputRecords).containsInAnyOrder(expectedRecord);
    pipeline.run();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) ElasticsearchInputProperties(org.talend.components.elasticsearch.input.ElasticsearchInputProperties) ConvertToIndexedRecord(org.talend.components.adapter.beam.transform.ConvertToIndexedRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) ArrayList(java.util.ArrayList) ElasticsearchDatasetProperties(org.talend.components.elasticsearch.ElasticsearchDatasetProperties) ElasticsearchOutputProperties(org.talend.components.elasticsearch.output.ElasticsearchOutputProperties) Test(org.junit.Test)

Example 3 with ElasticsearchInputProperties

use of org.talend.components.elasticsearch.input.ElasticsearchInputProperties in project components by Talend.

the class ElasticsearchBeamRuntimeTestIT method basicTest.

@Test
public void basicTest() throws MalformedURLException {
    final String TYPE_NAME = "basictest";
    List<String> records = Arrays.asList("r1", "r2", "r3");
    List<IndexedRecord> avroRecords = new ArrayList<>();
    for (String record : records) {
        avroRecords.add(ConvertToIndexedRecord.convertToAvro(record));
    }
    ElasticsearchDatasetProperties datasetProperties = new ElasticsearchDatasetProperties("datasetProperties");
    datasetProperties.init();
    datasetProperties.setDatastoreProperties(datastoreProperties);
    datasetProperties.index.setValue(INDEX_NAME);
    datasetProperties.type.setValue(TYPE_NAME);
    ElasticsearchOutputProperties outputProperties = new ElasticsearchOutputProperties("outputProperties");
    outputProperties.init();
    outputProperties.setDatasetProperties(datasetProperties);
    ElasticsearchOutputRuntime outputRuntime = new ElasticsearchOutputRuntime();
    outputRuntime.initialize(null, outputProperties);
    PCollection<IndexedRecord> inputRecords = (PCollection<IndexedRecord>) pipeline.apply(Create.of(avroRecords).withCoder(LazyAvroCoder.of()));
    inputRecords.apply(outputRuntime);
    pipeline.run();
    ElasticsearchTestUtils.upgradeIndexAndGetCurrentNumDocs(INDEX_NAME, TYPE_NAME, client);
    // input pipeline start
    ElasticsearchInputProperties inputProperties = new ElasticsearchInputProperties("inputProperties");
    inputProperties.init();
    inputProperties.setDatasetProperties(datasetProperties);
    ElasticsearchInputRuntime inputRuntime = new ElasticsearchInputRuntime();
    inputRuntime.initialize(null, inputProperties);
    PCollection<IndexedRecord> outputRecords = pipeline.apply(inputRuntime);
    PAssert.that(outputRecords).containsInAnyOrder(avroRecords);
    pipeline.run();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) ElasticsearchInputProperties(org.talend.components.elasticsearch.input.ElasticsearchInputProperties) ConvertToIndexedRecord(org.talend.components.adapter.beam.transform.ConvertToIndexedRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) ArrayList(java.util.ArrayList) ElasticsearchDatasetProperties(org.talend.components.elasticsearch.ElasticsearchDatasetProperties) ElasticsearchOutputProperties(org.talend.components.elasticsearch.output.ElasticsearchOutputProperties) Test(org.junit.Test)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)3 ElasticsearchInputProperties (org.talend.components.elasticsearch.input.ElasticsearchInputProperties)3 ArrayList (java.util.ArrayList)2 PCollection (org.apache.beam.sdk.values.PCollection)2 Test (org.junit.Test)2 ConvertToIndexedRecord (org.talend.components.adapter.beam.transform.ConvertToIndexedRecord)2 ElasticsearchDatasetProperties (org.talend.components.elasticsearch.ElasticsearchDatasetProperties)2 ElasticsearchOutputProperties (org.talend.components.elasticsearch.output.ElasticsearchOutputProperties)2 DirectOptions (org.apache.beam.runners.direct.DirectOptions)1 Pipeline (org.apache.beam.sdk.Pipeline)1