Search in sources :

Example 11 with OutputRow

use of com.thinkbiganalytics.spark.dataprofiler.output.OutputRow in project kylo by Teradata.

the class BigDecimalColumnStatistics method getStatistics.

/**
 * Write statistics for output result table
 */
@Override
public List<OutputRow> getStatistics() {
    final List<OutputRow> rows = new ArrayList<>();
    writeStatisticsCommon(rows);
    if (allNulls()) {
        min = BigDecimal.ZERO;
        max = BigDecimal.ZERO;
        sum = BigDecimal.ZERO;
    }
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MAX), String.valueOf(max)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MIN), String.valueOf(min)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.SUM), String.valueOf(sum)));
    return rows;
}
Also used : ArrayList(java.util.ArrayList) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)

Example 12 with OutputRow

use of com.thinkbiganalytics.spark.dataprofiler.output.OutputRow in project kylo by Teradata.

the class BooleanColumnStatistics method getStatistics.

/**
 * Write statistics for output result table
 */
@Override
public List<OutputRow> getStatistics() {
    final List<OutputRow> rows = new ArrayList<>();
    writeStatisticsCommon(rows);
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.TRUE_COUNT), String.valueOf(trueCount)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.FALSE_COUNT), String.valueOf(falseCount)));
    return rows;
}
Also used : ArrayList(java.util.ArrayList) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)

Example 13 with OutputRow

use of com.thinkbiganalytics.spark.dataprofiler.output.OutputRow in project kylo by Teradata.

the class ByteColumnStatistics method getStatistics.

/**
 * Write statistics for output result table
 */
@Override
public List<OutputRow> getStatistics() {
    final List<OutputRow> rows = new ArrayList<>();
    writeStatisticsCommon(rows);
    if (allNulls()) {
        min = 0;
        max = 0;
        sum = 0;
        mean = 0;
        stddev = 0;
        variance = 0;
    }
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MAX), String.valueOf(max)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MIN), String.valueOf(min)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.SUM), String.valueOf(sum)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MEAN), String.valueOf(mean)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.STDDEV), String.valueOf(stddev)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.VARIANCE), String.valueOf(variance)));
    return rows;
}
Also used : ArrayList(java.util.ArrayList) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)

Example 14 with OutputRow

use of com.thinkbiganalytics.spark.dataprofiler.output.OutputRow in project kylo by Teradata.

the class FloatColumnStatistics method getStatistics.

/**
 * Write statistics for output result table
 */
@Override
public List<OutputRow> getStatistics() {
    final List<OutputRow> rows = new ArrayList<>();
    writeStatisticsCommon(rows);
    if (allNulls()) {
        min = 0;
        max = 0;
        sum = 0;
        mean = 0;
        stddev = 0;
        variance = 0;
    }
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MAX), String.valueOf(max)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MIN), String.valueOf(min)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.SUM), String.valueOf(sum)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MEAN), String.valueOf(mean)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.STDDEV), String.valueOf(stddev)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.VARIANCE), String.valueOf(variance)));
    return rows;
}
Also used : ArrayList(java.util.ArrayList) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)

Example 15 with OutputRow

use of com.thinkbiganalytics.spark.dataprofiler.output.OutputRow in project kylo by Teradata.

the class LongColumnStatistics method getStatistics.

/**
 * Write statistics for output result table
 */
@Override
public List<OutputRow> getStatistics() {
    final List<OutputRow> rows = new ArrayList<>();
    writeStatisticsCommon(rows);
    if (allNulls()) {
        min = 0;
        max = 0;
        sum = 0;
        mean = 0;
        stddev = 0;
        variance = 0;
    }
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MAX), String.valueOf(max)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MIN), String.valueOf(min)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.SUM), String.valueOf(sum)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.MEAN), String.valueOf(mean)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.STDDEV), String.valueOf(stddev)));
    rows.add(new OutputRow(columnField.name(), String.valueOf(MetricType.VARIANCE), String.valueOf(variance)));
    return rows;
}
Also used : ArrayList(java.util.ArrayList) OutputRow(com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)

Aggregations

OutputRow (com.thinkbiganalytics.spark.dataprofiler.output.OutputRow)18 ArrayList (java.util.ArrayList)13 ProfilerConfiguration (com.thinkbiganalytics.spark.dataprofiler.ProfilerConfiguration)2 Nonnull (javax.annotation.Nonnull)2 DataSet (com.thinkbiganalytics.spark.DataSet)1 ColumnStatistics (com.thinkbiganalytics.spark.dataprofiler.ColumnStatistics)1 StatisticsModel (com.thinkbiganalytics.spark.dataprofiler.StatisticsModel)1 TimestampColumnStatistics (com.thinkbiganalytics.spark.dataprofiler.columns.TimestampColumnStatistics)1 ProfilerTest (com.thinkbiganalytics.spark.dataprofiler.core.ProfilerTest)1 DataValidatorResult (com.thinkbiganalytics.spark.datavalidator.DataValidatorResult)1 List (java.util.List)1 StructField (org.apache.spark.sql.types.StructField)1 Test (org.junit.Test)1