use of org.apache.spark.sql.Column in project kylo by Teradata.
the class DataSetProviderUtilV1 method map.
/**
* Applies the specified function to the specified field of the data set.
*/
@Nonnull
@SuppressWarnings("unchecked")
static DataFrame map(@Nonnull final DataFrame dataSet, @Nonnull final String fieldName, @Nonnull final Function1 function, @Nonnull final DataType returnType) {
final Seq<Column> inputs = Seq$.MODULE$.<Column>newBuilder().$plus$eq(dataSet.col(fieldName)).result();
final UserDefinedFunction udf = new UserDefinedFunction(function, returnType, (Seq<DataType>) Seq$.MODULE$.<DataType>empty());
return dataSet.withColumn(fieldName, udf.apply(inputs));
}
use of org.apache.spark.sql.Column in project kylo by Teradata.
the class StandardDataValidator method toSelectColumns.
private Column[] toSelectColumns(FieldPolicy[] policies1) {
List<Column> columns = new ArrayList<>();
log.info("Building select statement for # of policies {}", policies1.length);
for (int i = 0; i < policies1.length; i++) {
if (policies1[i].getField() != null) {
log.info("policy [{}] name {} feedName {}", i, policies1[i].getField(), policies1[i].getFeedField());
String feedField = StringUtils.defaultIfEmpty(policies1[i].getFeedField(), policies1[i].getField());
columns.add(new Column(feedField).as(policies1[i].getField()));
}
}
columns.add(new Column("processing_dttm"));
return columns.toArray(new Column[columns.size()]);
}
use of org.apache.spark.sql.Column in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method filterByDateTimeWithOverlap.
/**
* Verify filtering for new rows.
*/
@Test
public void filterByDateTimeWithOverlap() {
DateTimeUtils.setCurrentMillisFixed(1524960000000L);
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
final DataFrame filterDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.filter(Mockito.any(Column.class))).thenReturn(filterDataSet);
// Test filtering by date time
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.filterByDateTime(dataSet, "mockField", 1524873600000L, 60000L);
Assert.assertEquals(filterDataSet, newDataSet);
// Test condition
final ArgumentCaptor<Column> conditionCaptor = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).filter(conditionCaptor.capture());
Assert.assertEquals("((mockField > 1524873540000000) && (mockField < 1524959940000000))", conditionCaptor.getValue().toString());
}
use of org.apache.spark.sql.Column in project kylo by Teradata.
the class AbstractJdbcDataSetProviderTest method updateHighWaterMarkWithTimestamp.
/**
* Verify updating a high water mark for a timestamp column.
*/
@Test
public void updateHighWaterMarkWithTimestamp() {
// Mock data set
final DataFrame dataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.col("mockField")).thenReturn(new Column("mockField"));
final StructField field = DataTypes.createStructField("mockField", DataTypes.TimestampType, true);
Mockito.when(dataSet.schema()).thenReturn(DataTypes.createStructType(Collections.singletonList(field)));
final DataFrame mapDataSet = Mockito.mock(DataFrame.class);
Mockito.when(dataSet.withColumn(Mockito.eq("mockField"), Mockito.any(Column.class))).thenReturn(mapDataSet);
// Test updating high water mark
final KyloCatalogClient client = Mockito.mock(KyloCatalogClient.class);
final JdbcHighWaterMark highWaterMark = new JdbcHighWaterMark("mockWaterMark", client);
final MockJdbcDataSetProvider provider = new MockJdbcDataSetProvider();
final DataFrame newDataSet = provider.updateHighWaterMark(dataSet, "mockField", highWaterMark, client);
Assert.assertEquals(mapDataSet, newDataSet);
// Test replaced column
final ArgumentCaptor<Column> newColumn = ArgumentCaptor.forClass(Column.class);
Mockito.verify(dataSet).withColumn(Mockito.eq("mockField"), newColumn.capture());
Assert.assertTrue("Expected new column to be a UDF", newColumn.getValue().expr() instanceof ScalaUDF);
}
use of org.apache.spark.sql.Column in project kylo by Teradata.
the class AbstractJdbcDataSetProvider method filterByDateTime.
/**
* Filters the specified data set using the specified date field.
*/
@Nonnull
@VisibleForTesting
T filterByDateTime(@Nonnull final T dataSet, @Nonnull final String fieldName, @Nullable final Long value, @Nullable final Long overlap) {
long startTime = 0;
long endTime = DateTimeUtils.currentTimeMillis();
// Parse high water mark
if (value != null) {
if (value < endTime) {
startTime = value;
} else {
log.warn("Value for high water mark is the future: {}", value);
startTime = endTime;
}
}
// Parse overlap
if (overlap != null) {
startTime = Math.max(startTime - overlap, 0);
endTime -= overlap;
}
// Return filter
final Column dateColumn = new Column(fieldName);
final Column startFilter = (startTime > 0) ? dateColumn.gt(functions.lit(new Timestamp(startTime))) : null;
final Column endFilter = dateColumn.lt(functions.lit(new Timestamp(endTime)));
return filter(dataSet, (startFilter != null) ? startFilter.and(endFilter) : endFilter);
}
Aggregations