Search in sources :

Example 1 with Percentile

use of org.apache.commons.math.stat.descriptive.rank.Percentile in project pentaho-kettle by pentaho.

the class MemoryGroupBy method getAggregateResult.

/**
 * Used for junits in MemoryGroupByAggregationNullsTest
 *
 * @param aggregate
 * @return
 * @throws KettleValueException
 */
Object[] getAggregateResult(Aggregate aggregate) throws KettleValueException {
    Object[] result = new Object[data.subjectnrs.length];
    if (data.subjectnrs != null) {
        for (int i = 0; i < data.subjectnrs.length; i++) {
            Object ag = aggregate.agg[i];
            switch(meta.getAggregateType()[i]) {
                case MemoryGroupByMeta.TYPE_GROUP_SUM:
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_AVERAGE:
                    ag = ValueDataUtil.divide(data.aggMeta.getValueMeta(i), ag, new ValueMetaInteger("c"), aggregate.counts[i]);
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_MEDIAN:
                case MemoryGroupByMeta.TYPE_GROUP_PERCENTILE:
                    double percentile = 50.0;
                    if (meta.getAggregateType()[i] == MemoryGroupByMeta.TYPE_GROUP_PERCENTILE) {
                        percentile = Double.parseDouble(meta.getValueField()[i]);
                    }
                    @SuppressWarnings("unchecked") List<Double> valuesList = (List<Double>) aggregate.agg[i];
                    double[] values = new double[valuesList.size()];
                    for (int v = 0; v < values.length; v++) {
                        values[v] = valuesList.get(v);
                    }
                    ag = new Percentile().evaluate(values, percentile);
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_COUNT_ANY:
                case MemoryGroupByMeta.TYPE_GROUP_COUNT_ALL:
                case MemoryGroupByMeta.TYPE_GROUP_COUNT_DISTINCT:
                    ag = aggregate.counts[i];
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_MIN:
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_MAX:
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_STANDARD_DEVIATION:
                    double sum = (Double) ag / aggregate.counts[i];
                    ag = Double.valueOf(Math.sqrt(sum));
                    break;
                case MemoryGroupByMeta.TYPE_GROUP_CONCAT_COMMA:
                case MemoryGroupByMeta.TYPE_GROUP_CONCAT_STRING:
                    ag = ((StringBuilder) ag).toString();
                    break;
                default:
                    break;
            }
            if (ag == null && allNullsAreZero) {
                // PDI-11530 seems all rows for min function was nulls...
                ValueMetaInterface vm = data.aggMeta.getValueMeta(i);
                ag = ValueDataUtil.getZeroForValueMetaType(vm);
            }
            result[i] = ag;
        }
    }
    return result;
}
Also used : Percentile(org.apache.commons.math.stat.descriptive.rank.Percentile) ValueMetaInteger(org.pentaho.di.core.row.value.ValueMetaInteger) ArrayList(java.util.ArrayList) List(java.util.List) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Example 2 with Percentile

use of org.apache.commons.math.stat.descriptive.rank.Percentile in project pentaho-kettle by pentaho.

the class GroupBy method getAggregateResult.

/**
 * Used for junits in GroupByAggregationNullsTest
 *
 * @return
 * @throws KettleValueException
 */
Object[] getAggregateResult() throws KettleValueException {
    if (data.subjectnrs == null) {
        return new Object[0];
    }
    Object[] result = new Object[data.subjectnrs.length];
    for (int i = 0; i < data.subjectnrs.length; i++) {
        Object ag = data.agg[i];
        switch(meta.getAggregateType()[i]) {
            case GroupByMeta.TYPE_GROUP_SUM:
                break;
            case GroupByMeta.TYPE_GROUP_AVERAGE:
                ag = ValueDataUtil.divide(data.aggMeta.getValueMeta(i), ag, new ValueMetaInteger("c"), new Long(data.counts[i]));
                break;
            case GroupByMeta.TYPE_GROUP_MEDIAN:
            case GroupByMeta.TYPE_GROUP_PERCENTILE:
                double percentile = 50.0;
                if (meta.getAggregateType()[i] == GroupByMeta.TYPE_GROUP_PERCENTILE) {
                    percentile = Double.parseDouble(meta.getValueField()[i]);
                }
                @SuppressWarnings("unchecked") List<Double> valuesList = (List<Double>) data.agg[i];
                double[] values = new double[valuesList.size()];
                for (int v = 0; v < values.length; v++) {
                    values[v] = valuesList.get(v);
                }
                ag = new Percentile().evaluate(values, percentile);
                break;
            case GroupByMeta.TYPE_GROUP_COUNT_ANY:
            case GroupByMeta.TYPE_GROUP_COUNT_ALL:
                ag = new Long(data.counts[i]);
                break;
            case GroupByMeta.TYPE_GROUP_COUNT_DISTINCT:
                break;
            case GroupByMeta.TYPE_GROUP_MIN:
                break;
            case GroupByMeta.TYPE_GROUP_MAX:
                break;
            case GroupByMeta.TYPE_GROUP_STANDARD_DEVIATION:
                if (ag == null) {
                    // PMD-1037 - when all input data is null ag is null, npe on access ag
                    break;
                }
                double sum = (Double) ag / data.counts[i];
                ag = Double.valueOf(Math.sqrt(sum));
                break;
            case GroupByMeta.TYPE_GROUP_CONCAT_COMMA:
            case GroupByMeta.TYPE_GROUP_CONCAT_STRING:
                ag = ((StringBuilder) ag).toString();
                break;
            default:
                break;
        }
        if (ag == null && allNullsAreZero) {
            // PDI-10250, 6960 seems all rows for min function was nulls...
            // get output subject meta based on original subject meta calculation
            ValueMetaInterface vm = data.aggMeta.getValueMeta(i);
            ag = ValueDataUtil.getZeroForValueMetaType(vm);
        }
        result[i] = ag;
    }
    return result;
}
Also used : Percentile(org.apache.commons.math.stat.descriptive.rank.Percentile) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface) FileObject(org.apache.commons.vfs2.FileObject) ValueMetaInteger(org.pentaho.di.core.row.value.ValueMetaInteger) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 Percentile (org.apache.commons.math.stat.descriptive.rank.Percentile)2 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)2 ValueMetaInteger (org.pentaho.di.core.row.value.ValueMetaInteger)2 FileObject (org.apache.commons.vfs2.FileObject)1