use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method createHighWaterMark.
/**
* Creates a {@link JdbcHighWaterMark} using the specified high water mark.
*
* <p>The value is initialized using the {@link KyloCatalogClient}.</p>
*/
@Nonnull
@VisibleForTesting
JdbcHighWaterMark createHighWaterMark(@Nonnull final String highWaterMarkKey, @Nonnull final KyloCatalogClient<T> client) {
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark(highWaterMarkKey, client);
highWaterMark.setFormatter(new LongToDateTime());
// Set value
final String value = client.getHighWaterMarks().get(highWaterMarkKey);
if (value != null) {
try {
highWaterMark.accumulate(ISODateTimeFormat.dateTimeParser().withZoneUTC().parseMillis(value));
} catch (final IllegalArgumentException e) {
throw new KyloCatalogException("Invalid value for high water mark " + highWaterMarkKey + ": " + value, e);
}
}
return highWaterMark;
}
use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method read.
@Nonnull
@Override
public final T read(@Nonnull final KyloCatalogClient<T> client, @Nonnull final DataSetOptions options) {
// Set url for PostgreSQL databases
final Option<String> catalog = options.getOption("PGDBNAME");
final Option<String> url = options.getOption("url");
if (catalog.isDefined() && url.isDefined() && url.get().startsWith("jdbc:postgres://")) {
final String[] urlSplit = url.get().split("\\?", 2);
final String[] pathSplit = urlSplit[0].substring(16).split("/", 2);
if (pathSplit.length == 1 || StringUtils.equalsAny(pathSplit[1], "", "/")) {
String catalogUrl = "jdbc:postgres://" + pathSplit[0] + "/" + urlEncode(catalog.get());
if (urlSplit.length == 2) {
catalogUrl += "?" + urlSplit[1];
}
options.setOption("url", catalogUrl);
}
}
// Load data set
final DataFrameReader reader = SparkUtil.prepareDataFrameReader(getDataFrameReader(client, options), options, null);
reader.format(JdbcRelationProvider.class.getName());
T dataSet = load(reader);
// Handle high water mark
final String dateField = SparkUtil.getOrElse(options.getOption(DATE_FIELD_OPTION), null);
final String highWaterMarkKey = SparkUtil.getOrElse(options.getOption(HIGH_WATER_MARK_OPTION), null);
final Long overlap = getOverlap(options);
if (dateField != null && highWaterMarkKey != null) {
final JdbcHighWaterMark initialValue = createHighWaterMark(highWaterMarkKey, client);
dataSet = filterByDateTime(dataSet, dateField, initialValue.getValue(), overlap);
dataSet = updateHighWaterMark(dataSet, dateField, initialValue, client);
} else if (highWaterMarkKey != null) {
log.warn("Ignoring '{}' option because '{}' option was not specified", HIGH_WATER_MARK_OPTION, DATE_FIELD_OPTION);
} else if (overlap != null) {
log.warn("Ignoring '{}' option because '{}' and '{}' options were not specified", OVERLAP_OPTION, DATE_FIELD_OPTION, HIGH_WATER_MARK_OPTION);
}
return dataSet;
}
use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method updateHighWaterMarkWithDate.
/**
* Verify updating a high water mark for a date column.
*/
@Test
public void updateHighWaterMarkWithDate() {
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("mockField")).thenReturn(new Column("mockField"));
final StructField field = DataTypes.createStructField("mockField", DataTypes.DateType, true);
Mockito.when(dataSet.schema()).thenReturn(DataTypes.createStructType(Collections.singletonList(field)));
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("mockField"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Test updating high water mark
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark("mockWaterMark", client);
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.updateHighWaterMark(dataSet, "mockField", highWaterMark, client);
Assert.assertEquals(mapDataSet, newDataSet);
// Test replaced column
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("mockField"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method createHighWaterMarkWithExisting.
/**
* Verify creating a high water mark with an existing value.
*/
@Test
public void createHighWaterMarkWithExisting() {
// Mock Kylo Catalog client
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
Mockito.when(client.getHighWaterMarks()).thenReturn(Collections.singletonMap("mockWaterMark", "2018-04-29T15:05:03"));
// Test creating high water mark
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final JdbcHighWaterMark highWaterMark = provider.createHighWaterMark("mockWaterMark", client);
Assert.assertEquals("mockWaterMark", highWaterMark.getName());
Assert.assertEquals(new Long(1525014303000L), highWaterMark.getValue());
Mockito.reset(client);
// Test adding a value
highWaterMark.accumulate(86400000L);
Mockito.verifyZeroInteractions(client);
highWaterMark.accumulate(1532528828000L);
Mockito.verify(client).setHighWaterMarks(Collections.singletonMap("mockWaterMark", "2018-07-25T14:27:08"));
}
use of com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method createHighWaterMark.
/**
* Verify creating a high water mark.
*/
@Test
public void createHighWaterMark() {
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
// Test creating high water mark
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final JdbcHighWaterMark highWaterMark = provider.createHighWaterMark("mockWaterMark", client);
Assert.assertEquals("mockWaterMark", highWaterMark.getName());
Assert.assertNull("Expected initial value to be null", highWaterMark.getValue());
// Test adding a value
highWaterMark.accumulate(1525014303000L);
Mockito.verify(client).setHighWaterMarks(Collections.singletonMap("mockWaterMark", "2018-04-29T15:05:03"));
}
Aggregations