Search in sources :

Example 1 with ColumnStatistics

use of com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics in project kylo by Teradata.

the class ProfileStage method apply.

@Nonnull
@Override
public TransformResult apply(@Nullable final TransformResult result) {
    Preconditions.checkNotNull(result);
    // Profile data set
    final StatisticsModel dataStats = profiler.profile(result.getDataSet(), new ProfilerConfiguration());
    // Add stats to result
    if (dataStats != null) {
        final List<OutputRow> profile = (result.getProfile() != null) ? new ArrayList<>(result.getProfile()) : new ArrayList<OutputRow>(dataStats.getColumnStatisticsMap().size());
        for (final ColumnStatistics columnStats : dataStats.getColumnStatisticsMap().values()) {
            profile.addAll(columnStats.getStatistics());
        }
        result.setProfile(profile);
    }
    return result;
}
Also used : ColumnStatistics(com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics) StatisticsModel(com.thinkbiganalytics.spark.dataprofiler.StatisticsModel) ProfilerConfiguration(com.thinkbiganalytics.spark.dataprofiler.ProfilerConfiguration) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow) Nonnull(javax.annotation.Nonnull)

Example 2 with ColumnStatistics

use of com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics in project kylo by Teradata.

the class OutputWriter method writeModel.

/**
 * Write the profile statistics to Hive.
 */
public static void writeModel(@Nonnull final StatisticsModel model, @Nonnull final ProfilerConfiguration profilerConfiguration, @Nonnull final SQLContext sqlContext, @Nonnull final SparkContextService scs) {
    final OutputWriter writer = new OutputWriter(profilerConfiguration);
    for (final ColumnStatistics column : model.getColumnStatisticsMap().values()) {
        writer.addRows(column.getStatistics());
    }
    writer.writeResultToTable(sqlContext, scs);
}
Also used : ColumnStatistics(com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics)

Aggregations

ColumnStatistics (com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics)2 ProfilerConfiguration (com.thinkbiganalytics.spark.dataprofiler.ProfilerConfiguration)1 StatisticsModel (com.thinkbiganalytics.spark.dataprofiler.StatisticsModel)1 OutputRow (com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)1 Nonnull (javax.annotation.Nonnull)1