use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method updateHighWaterMarkWithTimestamp.
/**
* Verify updating a high water mark for a timestamp column.
*/
@Test
public void updateHighWaterMarkWithTimestamp() {
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("mockField")).thenReturn(new Column("mockField"));
final StructField field = DataTypes.createStructField("mockField", DataTypes.TimestampType, true);
Mockito.when(dataSet.schema()).thenReturn(DataTypes.createStructType(Collections.singletonList(field)));
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("mockField"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Test updating high water mark
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark("mockWaterMark", client);
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.updateHighWaterMark(dataSet, "mockField", highWaterMark, client);
Assert.assertEquals(mapDataSet, newDataSet);
// Test replaced column
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("mockField"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method updateHighWaterMark.
/**
* Scans the specified field and updates the specified high water mark.
*/
@Nonnull
@VisibleForTesting
T updateHighWaterMark(@Nonnull final T dataSet, @Nonnull final String fieldName, @Nonnull final JdbcHighWaterMark highWaterMark, @Nonnull final KyloCatalogClient<T> client) {
// Determine function to convert column to Long
final DataType fieldType = schema(dataSet).apply(fieldName).dataType();
final Function1<?, Long> toLong;
if (fieldType == DataTypes.DateType) {
toLong = new DateToLong();
} else if (fieldType == DataTypes.TimestampType) {
toLong = new TimestampToLong();
} else {
throw new KyloCatalogException("Unsupported column type for high water mark: " + fieldType);
}
// Create UDF and apply to field
final String accumulableId = (highWaterMark.getName() != null) ? highWaterMark.getName() : UUID.randomUUID().toString();
final Accumulable<JdbcHighWaterMark, Long> accumulable = accumulable(highWaterMark, accumulableId, new JdbcHighWaterMarkAccumulableParam(), client);
final JdbcHighWaterMarkVisitor<?> visitor = new JdbcHighWaterMarkVisitor<>(accumulable, toLong);
return map(dataSet, fieldName, visitor, fieldType);
}
Aggregations