use of org.apache.beam.sdk.values.PCollection in project components by Talend.
the class ElasticsearchBeamRuntimeTestIT method filterTest.
@Test
public void filterTest() throws MalformedURLException {
final String TYPE_NAME = "filtertest";
List<String> records = Arrays.asList("r1", "r2", "r3", "q1", "q2");
List<String> expectedRecords = Arrays.asList("r1", "r2", "r3");
List<IndexedRecord> expectedRecord = new ArrayList<>();
for (String record : expectedRecords) {
expectedRecord.add(ConvertToIndexedRecord.convertToAvro(record));
}
List<IndexedRecord> avroRecords = new ArrayList<>();
for (String record : records) {
avroRecords.add(ConvertToIndexedRecord.convertToAvro(record));
}
ElasticsearchDatasetProperties datasetProperties = new ElasticsearchDatasetProperties("datasetProperties");
datasetProperties.init();
datasetProperties.setDatastoreProperties(datastoreProperties);
datasetProperties.index.setValue(INDEX_NAME);
datasetProperties.type.setValue(TYPE_NAME);
ElasticsearchOutputProperties outputProperties = new ElasticsearchOutputProperties("outputProperties");
outputProperties.init();
outputProperties.setDatasetProperties(datasetProperties);
ElasticsearchOutputRuntime outputRuntime = new ElasticsearchOutputRuntime();
outputRuntime.initialize(null, outputProperties);
PCollection<IndexedRecord> inputRecords = (PCollection<IndexedRecord>) pipeline.apply(Create.of(avroRecords).withCoder(LazyAvroCoder.of()));
inputRecords.apply(outputRuntime);
pipeline.run();
ElasticsearchTestUtils.upgradeIndexAndGetCurrentNumDocs(INDEX_NAME, TYPE_NAME, client);
// input pipeline start
ElasticsearchInputProperties inputProperties = new ElasticsearchInputProperties("inputProperties");
inputProperties.init();
inputProperties.setDatasetProperties(datasetProperties);
inputProperties.query.setValue("{\"query\":{\"regexp\":{\"field\":\"r[1-3]\"}}}");
ElasticsearchInputRuntime inputRuntime = new ElasticsearchInputRuntime();
inputRuntime.initialize(null, inputProperties);
PCollection<IndexedRecord> outputRecords = pipeline.apply(inputRuntime);
PAssert.that(outputRecords).containsInAnyOrder(expectedRecord);
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project components by Talend.
the class ElasticsearchBeamRuntimeTestIT method getSampleTest.
@Test
public void getSampleTest() {
final String TYPE_NAME = "getsampletest";
List<String> records = Arrays.asList("r1", "r2", "r3");
List<IndexedRecord> avroRecords = new ArrayList<>();
for (String record : records) {
avroRecords.add(ConvertToIndexedRecord.convertToAvro(record));
}
ElasticsearchDatasetProperties datasetProperties = new ElasticsearchDatasetProperties("datasetProperties");
datasetProperties.init();
datasetProperties.setDatastoreProperties(datastoreProperties);
datasetProperties.index.setValue(INDEX_NAME);
datasetProperties.type.setValue(TYPE_NAME);
ElasticsearchOutputProperties outputProperties = new ElasticsearchOutputProperties("outputProperties");
outputProperties.init();
outputProperties.setDatasetProperties(datasetProperties);
ElasticsearchOutputRuntime outputRuntime = new ElasticsearchOutputRuntime();
outputRuntime.initialize(null, outputProperties);
PCollection<IndexedRecord> inputRecords = (PCollection<IndexedRecord>) pipeline.apply(Create.of(avroRecords).withCoder(LazyAvroCoder.of()));
inputRecords.apply(outputRuntime);
pipeline.run();
ElasticsearchTestUtils.upgradeIndexAndGetCurrentNumDocs(INDEX_NAME, TYPE_NAME, client);
ElasticsearchDatasetRuntime datasetRuntime = new ElasticsearchDatasetRuntime();
datasetRuntime.initialize(null, datasetProperties);
final List<IndexedRecord> samples = new ArrayList<>();
datasetRuntime.getSample(3, new Consumer<IndexedRecord>() {
@Override
public void accept(IndexedRecord indexedRecord) {
samples.add(indexedRecord);
}
});
compareListIndexedRecord(samples, avroRecords);
assertThat(samples.size(), is(3));
}
use of org.apache.beam.sdk.values.PCollection in project component-runtime by Talend.
the class BeamProcessorChainImpl method extractDoFn.
private static Collection<DoFn<?, ?>> extractDoFn(final CapturingPipeline.TransformWithCoder step, final CoderRegistry coderRegistry) {
final CapturingPipeline capturingPipeline = new CapturingPipeline(PipelineOptionsFactory.create());
if (coderRegistry != null) {
capturingPipeline.setCoderRegistry(coderRegistry);
}
final POutput apply = capturingPipeline.apply(new PTransform<PBegin, PCollection<Object>>() {
@Override
public PCollection<Object> expand(final PBegin input) {
return PCollection.createPrimitiveOutputInternal(capturingPipeline, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, TypingCoder.INSTANCE);
}
@Override
protected Coder<?> getDefaultOutputCoder() {
return TypingCoder.INSTANCE;
}
}).apply(step.getTransform());
if (PCollectionTuple.class.isInstance(apply) && step.getCoders() != null) {
final Map<TupleTag<?>, PCollection<?>> all = PCollectionTuple.class.cast(apply).getAll();
step.getCoders().forEach((k, v) -> {
final PCollection<?> collection = all.get(k);
if (collection != null) {
collection.setCoder(Coder.class.cast(v));
}
});
} else if (PCollection.class.isInstance(apply) && step.getCoders() != null && !step.getCoders().isEmpty()) {
PCollection.class.cast(apply).setCoder(Coder.class.cast(step.getCoders().values().iterator().next()));
}
final CapturingPipeline.SinkExtractor sinkExtractor = new CapturingPipeline.SinkExtractor();
capturingPipeline.traverseTopologically(sinkExtractor);
return sinkExtractor.getOutputs();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TransformHierarchyTest method replaceSucceeds.
@Test
public void replaceSucceeds() {
PTransform<?, ?> enclosingPT = new PTransform<PInput, POutput>() {
@Override
public POutput expand(PInput input) {
return PDone.in(input.getPipeline());
}
};
TransformHierarchy.Node enclosing = hierarchy.pushNode("Enclosing", PBegin.in(pipeline), enclosingPT);
Create.Values<Long> originalTransform = Create.of(1L);
TransformHierarchy.Node original = hierarchy.pushNode("Create", PBegin.in(pipeline), originalTransform);
assertThat(hierarchy.getCurrent(), equalTo(original));
PCollection<Long> originalOutput = pipeline.apply(originalTransform);
hierarchy.setOutput(originalOutput);
hierarchy.popNode();
assertThat(original.finishedSpecifying, is(true));
hierarchy.setOutput(PDone.in(pipeline));
hierarchy.popNode();
assertThat(hierarchy.getCurrent(), not(equalTo(enclosing)));
Read.Bounded<Long> replacementTransform = Read.from(CountingSource.upTo(1L));
PCollection<Long> replacementOutput = pipeline.apply(replacementTransform);
Node replacement = hierarchy.replaceNode(original, PBegin.in(pipeline), replacementTransform);
assertThat(hierarchy.getCurrent(), equalTo(replacement));
hierarchy.setOutput(replacementOutput);
TaggedPValue taggedReplacement = TaggedPValue.ofExpandedValue(replacementOutput);
Map<PCollection<?>, ReplacementOutput> replacementOutputs = Collections.singletonMap(replacementOutput, ReplacementOutput.of(TaggedPValue.ofExpandedValue(originalOutput), taggedReplacement));
hierarchy.replaceOutputs(replacementOutputs);
assertThat(replacement.getInputs(), equalTo(original.getInputs()));
assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode()));
assertThat(replacement.getEnclosingNode(), equalTo(enclosing));
assertThat(replacement.getTransform(), equalTo(replacementTransform));
// THe tags of the replacement transform are matched to the appropriate PValues of the original
assertThat(replacement.getOutputs().keySet(), Matchers.contains(taggedReplacement.getTag()));
assertThat(replacement.getOutputs().values(), Matchers.contains(originalOutput));
hierarchy.popNode();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TransformHierarchyTest method emptyCompositeSucceeds.
@Test
public void emptyCompositeSucceeds() {
PCollection<Long> created = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
hierarchy.setOutput(created);
hierarchy.popNode();
PCollectionList<Long> pcList = PCollectionList.of(created);
TransformHierarchy.Node emptyTransform = hierarchy.pushNode("Extract", pcList, new PTransform<PCollectionList<Long>, PCollection<Long>>() {
@Override
public PCollection<Long> expand(PCollectionList<Long> input) {
return input.get(0);
}
});
hierarchy.setOutput(created);
hierarchy.popNode();
assertThat(hierarchy.getProducer(created), equalTo(node));
assertThat("A Transform that produces non-primitive output should be composite", emptyTransform.isCompositeNode(), is(true));
}
Aggregations