use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TransformHierarchyTest method visitAfterReplace.
/**
* Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
* the original nodes or inaccessible values but does visit all of the replacement nodes, new
* inaccessible replacement values, and the original output values.
*/
@Test
public void visitAfterReplace() {
Node root = hierarchy.getCurrent();
final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext ctxt) {
ctxt.output(ctxt.element() + 1L);
}
});
GenerateSequence genUpstream = GenerateSequence.from(0);
PCollection<Long> upstream = pipeline.apply(genUpstream);
PCollection<Long> output = upstream.apply("Original", originalParDo);
Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(upstream);
hierarchy.popNode();
Node original = hierarchy.pushNode("Original", upstream, originalParDo);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(output);
hierarchy.popNode();
final TupleTag<Long> longs = new TupleTag<>();
final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext ctxt) {
ctxt.output(ctxt.element() + 1L);
}
}).withOutputTags(longs, TupleTagList.empty());
PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {
@Override
public PCollection<Long> expand(PCollection<Long> input) {
return input.apply("Contained", replacementParDo).get(longs);
}
};
PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(replacementOutput);
hierarchy.popNode();
hierarchy.setOutput(replacementOutput.get(longs));
Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
hierarchy.popNode();
final Set<Node> visitedCompositeNodes = new HashSet<>();
final Set<Node> visitedPrimitiveNodes = new HashSet<>();
Set<PValue> visitedValues = hierarchy.visit(new Defaults() {
@Override
public CompositeBehavior enterCompositeTransform(Node node) {
visitedCompositeNodes.add(node);
return CompositeBehavior.ENTER_TRANSFORM;
}
@Override
public void visitPrimitiveTransform(Node node) {
visitedPrimitiveNodes.add(node);
}
});
/*
Final Graph:
Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
*/
assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
assertThat(visitedValues, containsInAnyOrder(upstream, output));
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class JdbcIOTest method testReadRowsWithNumericFieldsWithExcessPrecision.
@Test
public void testReadRowsWithNumericFieldsWithExcessPrecision() {
PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("SELECT CAST(1 AS NUMERIC(10, 2)) AS T1 FROM %s WHERE name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
Schema expectedSchema = Schema.of(Schema.Field.of("T1", FieldType.logicalType(FixedPrecisionNumeric.of(NUMERIC.getName(), 10, 2)).withNullable(false)));
assertEquals(expectedSchema, rows.getSchema());
PCollection<Row> output = rows.apply(Select.fieldNames("T1"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues(BigDecimal.valueOf(1).setScale(2, RoundingMode.HALF_UP)).build()));
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class JdbcIOTest method testWriteWithoutPreparedStatementWithReadRows.
@Test
public void testWriteWithoutPreparedStatementWithReadRows() throws Exception {
SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
String writeTableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS_WITH_READ_ROWS");
DatabaseTestHelper.createTable(DATA_SOURCE, writeTableName);
try {
rows.apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(writeTableName));
pipeline.run();
} finally {
DatabaseTestHelper.deleteTable(DATA_SOURCE, writeTableName);
}
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class JdbcIOTest method testReadWithSchema.
@Test
public void testReadWithSchema() {
SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
JdbcIO.RowMapper<RowWithSchema> rowMapper = rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID"));
pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class);
PCollection<RowWithSchema> rows = pipeline.apply(JdbcIO.<RowWithSchema>read().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withRowMapper(rowMapper).withCoder(SerializableCoder.of(RowWithSchema.class)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING), Schema.Field.of("id", Schema.FieldType.INT32));
assertEquals(expectedSchema, rows.getSchema());
PCollection<Row> output = rows.apply(Select.fieldNames("name", "id"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class JdbcIOTest method testReadRowsWithDataSourceConfiguration.
@Test
public void testReadRowsWithDataSourceConfiguration() {
PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
Schema expectedSchema = Schema.of(Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)).withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));
assertEquals(expectedSchema, rows.getSchema());
PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
pipeline.run();
}
Aggregations