use of com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics in project kylo by Teradata.
the class ProfileStage method apply.
@Nonnull
@Override
public TransformResult apply(@Nullable final TransformResult result) {
Preconditions.checkNotNull(result);
// Profile data set
final StatisticsModel dataStats = profiler.profile(result.getDataSet(), new ProfilerConfiguration());
// Add stats to result
if (dataStats != null) {
final List<OutputRow> profile = (result.getProfile() != null) ? new ArrayList<>(result.getProfile()) : new ArrayList<OutputRow>(dataStats.getColumnStatisticsMap().size());
for (final ColumnStatistics columnStats : dataStats.getColumnStatisticsMap().values()) {
profile.addAll(columnStats.getStatistics());
}
result.setProfile(profile);
}
return result;
}
use of com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics in project kylo by Teradata.
the class OutputWriter method writeModel.
/**
* Write the profile statistics to Hive.
*/
public static void writeModel(@Nonnull final StatisticsModel model, @Nonnull final ProfilerConfiguration profilerConfiguration, @Nonnull final SQLContext sqlContext, @Nonnull final SparkContextService scs) {
final OutputWriter writer = new OutputWriter(profilerConfiguration);
for (final ColumnStatistics column : model.getColumnStatisticsMap().values()) {
writer.addRows(column.getStatistics());
}
writer.writeResultToTable(sqlContext, scs);
}
Aggregations