use of org.apache.beam.sdk.values.KV in project beam by apache.
the class HIFIOElasticIT method testHifIOWithElasticQuery.
/**
* This test reads data from the Elasticsearch instance based on a query and verifies if data is
* read successfully.
*/
@Test
public void testHifIOWithElasticQuery() {
String expectedHashCode = "d7a7e4e42c2ca7b83ef7c1ad1ebce000";
Long expectedRecordsCount = 1L;
Configuration conf = getConfiguration(options);
String query = "{" + " \"query\": {" + " \"match\" : {" + " \"Title\" : {" + " \"query\" : \"Title9\"," + " \"type\" : \"boolean\"" + " }" + " }" + " }" + "}";
conf.set(ConfigurationOptions.ES_QUERY, query);
PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class JdbcIOTest method testWrite.
@Test
@Category(NeedsRunner.class)
public void testWrite() throws Exception {
String tableName = JdbcTestDataSet.createWriteDataTable(dataSource);
try {
ArrayList<KV<Integer, String>> data = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
KV<Integer, String> kv = KV.of(i, "Test");
data.add(kv);
}
pipeline.apply(Create.of(data)).apply(JdbcIO.<KV<Integer, String>>write().withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create("org.apache.derby.jdbc.ClientDriver", "jdbc:derby://localhost:" + port + "/target/beam")).withStatement(String.format("insert into %s values(?, ?)", tableName)).withPreparedStatementSetter(new JdbcIO.PreparedStatementSetter<KV<Integer, String>>() {
public void setParameters(KV<Integer, String> element, PreparedStatement statement) throws Exception {
statement.setInt(1, element.getKey());
statement.setString(2, element.getValue());
}
}));
pipeline.run();
try (Connection connection = dataSource.getConnection()) {
try (Statement statement = connection.createStatement()) {
try (ResultSet resultSet = statement.executeQuery("select count(*) from " + tableName)) {
resultSet.next();
int count = resultSet.getInt(1);
Assert.assertEquals(2000, count);
}
}
}
} finally {
JdbcTestDataSet.cleanUpDataTable(dataSource, tableName);
}
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchStatefulParDoTranslation.
/**
* Smoke test to fail fast if translation of a stateful ParDo
* in batch breaks.
*/
@Test
public void testBatchStatefulParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
};
pipeline.apply(Create.of(KV.of(1, 1))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Integer>() {
@StateId("unused")
final StateSpec<ValueState<Integer>> stateSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void process(ProcessContext c) {
// noop
}
}).withOutputTags(mainOutputTag, TupleTagList.empty()));
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The job should look like:
// 0. ParallelRead (Create)
// 1. ParDo(ReifyWVs)
// 2. GroupByKeyAndSortValuesONly
// 3. A ParDo over grouped and sorted KVs that is executed via ungrouping service-side
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
Step createStep = steps.get(0);
assertEquals("ParallelRead", createStep.getKind());
Step reifyWindowedValueStep = steps.get(1);
assertEquals("ParallelDo", reifyWindowedValueStep.getKind());
Step gbkStep = steps.get(2);
assertEquals("GroupByKey", gbkStep.getKind());
Step statefulParDoStep = steps.get(3);
assertEquals("ParallelDo", statefulParDoStep.getKind());
assertThat((String) statefulParDoStep.getProperties().get(PropertyNames.USES_KEYED_STATE), not(equalTo("true")));
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class BoundedReadFromUnboundedSourceTest method testForwardsDisplayData.
@Test
public void testForwardsDisplayData() {
TestCountingSource src = new TestCountingSource(1234) {
@Override
public void populateDisplayData(DisplayData.Builder builder) {
builder.add(DisplayData.item("foo", "bar"));
}
};
BoundedReadFromUnboundedSource<KV<Integer, Integer>> read = Read.from(src).withMaxNumRecords(5);
assertThat(DisplayData.from(read), includesDisplayDataFor("source", src));
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class AutoCompleteTest method testTinyAutoComplete.
@Test
public void testTinyAutoComplete() {
List<String> words = Arrays.asList("x", "x", "x", "xy", "xy", "xyz");
PCollection<String> input = p.apply(Create.of(words));
PCollection<KV<String, List<CompletionCandidate>>> output = input.apply(new ComputeTopCompletions(2, recursive));
PAssert.that(output).containsInAnyOrder(KV.of("x", parseList("x:3", "xy:2")), KV.of("xy", parseList("xy:2", "xyz:1")), KV.of("xyz", parseList("xyz:1")));
p.run().waitUntilFinish();
}
Aggregations