Search in sources :

Example 1 with JobCancelledException

use of water.Job.JobCancelledException in project h2o-2 by h2oai.

the class FrameTask method map.

/**
   * Extracts the values, applies standardization/normalization to numerics, adds appropriate offsets to categoricals,
   * and adapts response according to the CaseMode/CaseValue if set.
   */
@Override
public final void map(Chunk[] chunks, NewChunk[] outputs) {
    if (_jobKey != null && !Job.isRunning(_jobKey))
        throw new JobCancelledException();
    final int nrows = chunks[0]._len;
    final long offset = chunks[0]._start;
    chunkInit();
    double[] nums = MemoryManager.malloc8d(_dinfo._nums);
    int[] cats = MemoryManager.malloc4(_dinfo._cats);
    double[] response = _dinfo._responses == 0 ? null : MemoryManager.malloc8d(_dinfo._responses);
    int start = 0;
    int end = nrows;
    //random generator for skipping rows
    Random skip_rng = null;
    //Example:
    // _useFraction = 0.8 -> 1 repeat with fraction = 0.8
    // _useFraction = 1.0 -> 1 repeat with fraction = 1.0
    // _useFraction = 1.1 -> 2 repeats with fraction = 0.55
    // _useFraction = 2.1 -> 3 repeats with fraction = 0.7
    // _useFraction = 3.0 -> 3 repeats with fraction = 1.0
    final int repeats = (int) Math.ceil(_useFraction);
    final float fraction = _useFraction / repeats;
    if (fraction < 1.0)
        skip_rng = water.util.Utils.getDeterRNG(new Random().nextLong());
    long[] shuf_map = null;
    if (_shuffle) {
        shuf_map = new long[end - start];
        for (int i = 0; i < shuf_map.length; ++i) shuf_map[i] = start + i;
        Utils.shuffleArray(shuf_map, new Random().nextLong());
    }
    long num_processed_rows = 0;
    for (int rrr = 0; rrr < repeats; ++rrr) {
        OUTER: for (int rr = start; rr < end; ++rr) {
            final int r = shuf_map != null ? (int) shuf_map[rr - start] : rr;
            final long lr = r + chunks[0]._start;
            if ((_dinfo._nfolds > 0 && (lr % _dinfo._nfolds) == _dinfo._foldId) || (skip_rng != null && skip_rng.nextFloat() > fraction))
                continue;
            //count rows with missing values even if they are skipped
            ++num_processed_rows;
            // skip rows with NAs!
            for (Chunk c : chunks) if (skipMissing() && c.isNA0(r))
                continue OUTER;
            int i = 0, ncats = 0;
            for (; i < _dinfo._cats; ++i) {
                int c;
                if (chunks[i].isNA0(r)) {
                    //missing value turns into extra (last) factor
                    cats[ncats++] = (_dinfo._catOffsets[i + 1] - 1);
                } else {
                    c = (int) chunks[i].at80(r);
                    if (_dinfo._catLvls != null) {
                        // some levels are ignored?
                        c = Arrays.binarySearch(_dinfo._catLvls[i], c);
                        if (c >= 0)
                            cats[ncats++] = c + _dinfo._catOffsets[i];
                    } else if (_dinfo._useAllFactorLevels)
                        cats[ncats++] = c + _dinfo._catOffsets[i];
                    else if (c != 0)
                        cats[ncats++] = c + _dinfo._catOffsets[i] - 1;
                }
            }
            final int n = chunks.length - _dinfo._responses;
            for (; i < n; ++i) {
                //can be NA if skipMissing() == false
                double d = chunks[i].at0(r);
                if (_dinfo._normSub != null)
                    d -= _dinfo._normSub[i - _dinfo._cats];
                if (_dinfo._normMul != null)
                    d *= _dinfo._normMul[i - _dinfo._cats];
                nums[i - _dinfo._cats] = d;
            }
            for (i = 0; i < _dinfo._responses; ++i) {
                response[i] = chunks[chunks.length - _dinfo._responses + i].at0(r);
                if (_dinfo._normRespSub != null)
                    response[i] -= _dinfo._normRespSub[i];
                if (_dinfo._normRespMul != null)
                    response[i] *= _dinfo._normRespMul[i];
                // skip rows without a valid response (no supervised training possible)
                if (Double.isNaN(response[i]))
                    continue OUTER;
            }
            long seed = offset + rrr * (end - start) + r;
            if (outputs != null && outputs.length > 0)
                processRow(seed, nums, ncats, cats, response, outputs);
            else
                processRow(seed, nums, ncats, cats, response);
        }
    }
    chunkDone(num_processed_rows);
}
Also used : Random(java.util.Random) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) JobCancelledException(water.Job.JobCancelledException)

Aggregations

Random (java.util.Random)1 JobCancelledException (water.Job.JobCancelledException)1 Chunk (water.fvec.Chunk)1 NewChunk (water.fvec.NewChunk)1