use of org.apache.spark.sql.types.DataTypes.IntegerType in project hudi by apache.
the class TestChainedTransformer method testChainedTransformation.
@Test
public void testChainedTransformation() {
StructType schema = DataTypes.createStructType(new StructField[] { createStructField("foo", StringType, false) });
Row r1 = RowFactory.create("100");
Row r2 = RowFactory.create("200");
Dataset<Row> original = spark().sqlContext().createDataFrame(Arrays.asList(r1, r2), schema);
Transformer t1 = (jsc, sparkSession, dataset, properties) -> dataset.withColumnRenamed("foo", "bar");
Transformer t2 = (jsc, sparkSession, dataset, properties) -> dataset.withColumn("bar", dataset.col("bar").cast(IntegerType));
ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(t1, t2));
Dataset<Row> transformed = transformer.apply(jsc(), spark(), original, null);
assertEquals(2, transformed.count());
assertArrayEquals(new String[] { "bar" }, transformed.columns());
List<Row> rows = transformed.collectAsList();
assertEquals(100, rows.get(0).getInt(0));
assertEquals(200, rows.get(1).getInt(0));
}
Aggregations