Search in sources :

Example 16 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AUC2 method perfectAUC.

// ==========
// Given the probabilities of a 1, and the actuals (0/1) report the perfect
// AUC found by sorting the entire dataset.  Expensive, and only works for
// small data (probably caps out at about 10M rows).
public static double perfectAUC(Vec vprob, Vec vacts) {
    if (vacts.min() < 0 || vacts.max() > 1 || !vacts.isInt())
        throw new IllegalArgumentException("Actuals are either 0 or 1");
    if (vprob.min() < 0 || vprob.max() > 1)
        throw new IllegalArgumentException("Probabilities are between 0 and 1");
    // Horrible data replication into array of structs, to sort.  
    Pair[] ps = new Pair[(int) vprob.length()];
    Vec.Reader rprob = vprob.new Reader();
    Vec.Reader racts = vacts.new Reader();
    for (int i = 0; i < ps.length; i++) ps[i] = new Pair(rprob.at(i), (byte) racts.at8(i));
    return perfectAUC(ps);
}
Also used : Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 17 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class ConfusionMatrix method buildCM.

/** Build the CM data from the actuals and predictions, using the default
   *  threshold.  Print to Log.info if the number of classes is below the
   *  print_threshold.  Actuals might have extra levels not trained on (hence
   *  never predicted).  Actuals with NAs are not scored, and their predictions
   *  ignored. */
public static ConfusionMatrix buildCM(Vec actuals, Vec predictions) {
    if (!actuals.isCategorical())
        throw new IllegalArgumentException("actuals must be categorical.");
    if (!predictions.isCategorical())
        throw new IllegalArgumentException("predictions must be categorical.");
    Scope.enter();
    try {
        Vec adapted = predictions.adaptTo(actuals.domain());
        int len = actuals.domain().length;
        CMBuilder cm = new CMBuilder(len).doAll(actuals, adapted);
        return new ConfusionMatrix(cm._arr, actuals.domain());
    } finally {
        Scope.exit();
    }
}
Also used : Vec(water.fvec.Vec)

Example 18 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class DMatrix method transpose.

/**
   * Transpose the Frame as if it was a matrix (i.e. rows become coumns).
   * Must be all numeric, currently will fail if there are too many rows ( >= ~.5M).
   * Result will be put into a new Vectro Group and will be balanced so that each vec will have
   * (4*num cpus in the cluster) chunks.
   *
   * @param src
   * @return
   */
public static Frame transpose(Frame src) {
    if (src.numRows() != (int) src.numRows())
        throw H2O.unimpl();
    int nchunks = Math.max(1, src.numCols() / 10000);
    long[] espc = new long[nchunks + 1];
    int rpc = (src.numCols() / nchunks);
    int rem = (src.numCols() % nchunks);
    Arrays.fill(espc, rpc);
    for (int i = 0; i < rem; ++i) ++espc[i];
    long sum = 0;
    for (int i = 0; i < espc.length; ++i) {
        long s = espc[i];
        espc[i] = sum;
        sum += s;
    }
    Key key = Vec.newKey();
    int rowLayout = Vec.ESPC.rowLayout(key, espc);
    return transpose(src, new Frame(new Vec(key, rowLayout).makeZeros((int) src.numRows())));
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec)

Example 19 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GainsLift method init.

private void init(Job job) throws IllegalArgumentException {
    _labels = _labels.toCategoricalVec();
    if (_labels == null || _preds == null)
        throw new IllegalArgumentException("Missing actualLabels or predictedProbs!");
    if (_labels.length() != _preds.length())
        throw new IllegalArgumentException("Both arguments must have the same length (" + _labels.length() + "!=" + _preds.length() + ")!");
    if (!_labels.isInt())
        throw new IllegalArgumentException("Actual column must be integer class labels!");
    if (_labels.cardinality() != -1 && _labels.cardinality() != 2)
        throw new IllegalArgumentException("Actual column must contain binary class labels, but found cardinality " + _labels.cardinality() + "!");
    if (_preds.isCategorical())
        throw new IllegalArgumentException("Predicted probabilities cannot be class labels, expect probabilities.");
    if (_weights != null && !_weights.isNumeric())
        throw new IllegalArgumentException("Observation weights must be numeric.");
    // The vectors are from different groups => align them, but properly delete it after computation
    if (!_labels.group().equals(_preds.group())) {
        _preds = _labels.align(_preds);
        Scope.track(_preds);
        if (_weights != null) {
            _weights = _labels.align(_weights);
            Scope.track(_weights);
        }
    }
    boolean fast = false;
    if (fast) {
        // FAST VERSION: single-pass, only works with the specific pre-computed quantiles from rollupstats
        assert (_groups == 10);
        assert (Arrays.equals(Vec.PERCENTILES, //             0      1    2    3    4     5        6          7    8   9   10          11    12   13   14    15, 16
        new double[] { 0.001, 0.01, 0.1, 0.2, 0.25, 0.3, 1.0 / 3.0, 0.4, 0.5, 0.6, 2.0 / 3.0, 0.7, 0.75, 0.8, 0.9, 0.99, 0.999 }));
        //HACK: hardcoded quantiles for simplicity (0.9,0.8,...,0.1,0)
        //might do a full pass over the Vec
        double[] rq = _preds.pctiles();
        _quantiles = new double[] { rq[14], rq[13], rq[11], rq[9], rq[8], rq[7], rq[5], rq[3], rq[2], 0 };
    } else {
        // ACCURATE VERSION: multi-pass
        Frame fr = null;
        QuantileModel qm = null;
        try {
            QuantileModel.QuantileParameters qp = new QuantileModel.QuantileParameters();
            if (_weights == null) {
                fr = new Frame(Key.<Frame>make(), new String[] { "predictions" }, new Vec[] { _preds });
            } else {
                fr = new Frame(Key.<Frame>make(), new String[] { "predictions", "weights" }, new Vec[] { _preds, _weights });
                qp._weights_column = "weights";
            }
            DKV.put(fr);
            qp._train = fr._key;
            if (_groups > 0) {
                qp._probs = new double[_groups];
                for (int i = 0; i < _groups; ++i) {
                    // This is 0.9, 0.8, 0.7, 0.6, ..., 0.1, 0 for 10 groups
                    qp._probs[i] = (_groups - i - 1.) / _groups;
                }
            } else {
                qp._probs = new double[] { 0.99, 0.98, 0.97, 0.96, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0 };
            }
            qm = job != null && !job.isDone() ? new Quantile(qp, job).trainModelNested(null) : new Quantile(qp).trainModel().get();
            _quantiles = qm._output._quantiles[0];
            // find uniques (is there a more elegant way?)
            TreeSet<Double> hs = new TreeSet<>();
            for (double d : _quantiles) hs.add(d);
            _quantiles = new double[hs.size()];
            Iterator<Double> it = hs.descendingIterator();
            int i = 0;
            while (it.hasNext()) _quantiles[i++] = it.next();
        } finally {
            if (qm != null)
                qm.remove();
            if (fr != null)
                DKV.remove(fr._key);
        }
    }
}
Also used : Frame(water.fvec.Frame) PrettyPrint(water.util.PrettyPrint) QuantileModel(hex.quantile.QuantileModel) Vec(water.fvec.Vec) TreeSet(java.util.TreeSet) Quantile(hex.quantile.Quantile)

Example 20 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class GBMGridTest method testDuplicatesCarsGrid.

//@Ignore("PUBDEV-1643")
@Test
public void testDuplicatesCarsGrid() {
    Grid grid = null;
    Frame fr = null;
    Vec old = null;
    try {
        fr = parse_test_file("smalldata/junit/cars_20mpg.csv");
        // Remove unique id
        fr.remove("name").remove();
        old = fr.remove("economy");
        // response to last column
        fr.add("economy", old);
        DKV.put(fr);
        // Setup random hyperparameter search space
        HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {

            {
                put("_distribution", new DistributionFamily[] { DistributionFamily.gaussian });
                put("_ntrees", new Integer[] { 5, 5 });
                put("_max_depth", new Integer[] { 2, 2 });
                put("_learn_rate", new Double[] { .1, .1 });
            }
        };
        // Fire off a grid search
        GBMModel.GBMParameters params = new GBMModel.GBMParameters();
        params._train = fr._key;
        params._response_column = "economy";
        Job<Grid> gs = GridSearch.startGridSearch(null, params, hyperParms);
        grid = gs.get();
        // Check that duplicate model have not been constructed
        Model[] models = grid.getModels();
        assertTrue("Number of returned models has to be > 0", models.length > 0);
        // But all off them should be same
        Key<Model> modelKey = models[0]._key;
        for (Model m : models) {
            assertTrue("Number of constructed models has to be equal to 1", modelKey == m._key);
        }
    } finally {
        if (old != null) {
            old.remove();
        }
        if (fr != null) {
            fr.remove();
        }
        if (grid != null) {
            grid.remove();
        }
    }
}
Also used : Frame(water.fvec.Frame) HashMap(java.util.HashMap) Grid(hex.grid.Grid) Vec(water.fvec.Vec) Model(hex.Model) Test(org.junit.Test)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9