use of org.apache.spark.sql.catalyst.expressions.ScalaUDF in project kylo by Teradata.
the class AbstractSparkDataSetProviderTest method readDeleteSourceFile.
/**
* Verify reading a data set and deleting the source file.
*/
@Test
@SuppressWarnings("unchecked")
public void readDeleteSourceFile() {
isFileFormat = true;
// Mock data set
dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("value")).thenReturn(new Column("value"));
final StructType schema = DataTypes.createStructType(Collections.singletonList(DataTypes.createStructField("value", DataTypes.StringType, true)));
Mockito.when(dataSet.schema()).thenReturn(schema);
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("value"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Mock options
final DataSetOptions options = new DataSetOptions();
options.setFormat("text");
options.setOption(KyloCatalogConstants.PATH_OPTION, "/mock/path/file.txt");
options.setOption("keepSourceFile", "FALSE");
// Test reading
final MockSparkDataSetProvider provider = new MockSparkDataSetProvider();
final DataFrame df = provider.read(Mockito.mock(KyloCatalogClient.class), options);
Assert.assertEquals(mapDataSet, df);
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("value"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
use of org.apache.spark.sql.catalyst.expressions.ScalaUDF in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method updateHighWaterMarkWithDate.
/**
* Verify updating a high water mark for a date column.
*/
@Test
public void updateHighWaterMarkWithDate() {
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("mockField")).thenReturn(new Column("mockField"));
final StructField field = DataTypes.createStructField("mockField", DataTypes.DateType, true);
Mockito.when(dataSet.schema()).thenReturn(DataTypes.createStructType(Collections.singletonList(field)));
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("mockField"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Test updating high water mark
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark("mockWaterMark", client);
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.updateHighWaterMark(dataSet, "mockField", highWaterMark, client);
Assert.assertEquals(mapDataSet, newDataSet);
// Test replaced column
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("mockField"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
use of org.apache.spark.sql.catalyst.expressions.ScalaUDF in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method updateHighWaterMarkWithTimestamp.
/**
* Verify updating a high water mark for a timestamp column.
*/
@Test
public void updateHighWaterMarkWithTimestamp() {
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("mockField")).thenReturn(new Column("mockField"));
final StructField field = DataTypes.createStructField("mockField", DataTypes.TimestampType, true);
Mockito.when(dataSet.schema()).thenReturn(DataTypes.createStructType(Collections.singletonList(field)));
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("mockField"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Test updating high water mark
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark("mockWaterMark", client);
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.updateHighWaterMark(dataSet, "mockField", highWaterMark, client);
Assert.assertEquals(mapDataSet, newDataSet);
// Test replaced column
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("mockField"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
Aggregations