use of water.Job.JobCancelledException in project h2o-2 by h2oai.
the class FrameTask method map.
/**
* Extracts the values, applies standardization/normalization to numerics, adds appropriate offsets to categoricals,
* and adapts response according to the CaseMode/CaseValue if set.
*/
@Override
public final void map(Chunk[] chunks, NewChunk[] outputs) {
if (_jobKey != null && !Job.isRunning(_jobKey))
throw new JobCancelledException();
final int nrows = chunks[0]._len;
final long offset = chunks[0]._start;
chunkInit();
double[] nums = MemoryManager.malloc8d(_dinfo._nums);
int[] cats = MemoryManager.malloc4(_dinfo._cats);
double[] response = _dinfo._responses == 0 ? null : MemoryManager.malloc8d(_dinfo._responses);
int start = 0;
int end = nrows;
//random generator for skipping rows
Random skip_rng = null;
//Example:
// _useFraction = 0.8 -> 1 repeat with fraction = 0.8
// _useFraction = 1.0 -> 1 repeat with fraction = 1.0
// _useFraction = 1.1 -> 2 repeats with fraction = 0.55
// _useFraction = 2.1 -> 3 repeats with fraction = 0.7
// _useFraction = 3.0 -> 3 repeats with fraction = 1.0
final int repeats = (int) Math.ceil(_useFraction);
final float fraction = _useFraction / repeats;
if (fraction < 1.0)
skip_rng = water.util.Utils.getDeterRNG(new Random().nextLong());
long[] shuf_map = null;
if (_shuffle) {
shuf_map = new long[end - start];
for (int i = 0; i < shuf_map.length; ++i) shuf_map[i] = start + i;
Utils.shuffleArray(shuf_map, new Random().nextLong());
}
long num_processed_rows = 0;
for (int rrr = 0; rrr < repeats; ++rrr) {
OUTER: for (int rr = start; rr < end; ++rr) {
final int r = shuf_map != null ? (int) shuf_map[rr - start] : rr;
final long lr = r + chunks[0]._start;
if ((_dinfo._nfolds > 0 && (lr % _dinfo._nfolds) == _dinfo._foldId) || (skip_rng != null && skip_rng.nextFloat() > fraction))
continue;
//count rows with missing values even if they are skipped
++num_processed_rows;
// skip rows with NAs!
for (Chunk c : chunks) if (skipMissing() && c.isNA0(r))
continue OUTER;
int i = 0, ncats = 0;
for (; i < _dinfo._cats; ++i) {
int c;
if (chunks[i].isNA0(r)) {
//missing value turns into extra (last) factor
cats[ncats++] = (_dinfo._catOffsets[i + 1] - 1);
} else {
c = (int) chunks[i].at80(r);
if (_dinfo._catLvls != null) {
// some levels are ignored?
c = Arrays.binarySearch(_dinfo._catLvls[i], c);
if (c >= 0)
cats[ncats++] = c + _dinfo._catOffsets[i];
} else if (_dinfo._useAllFactorLevels)
cats[ncats++] = c + _dinfo._catOffsets[i];
else if (c != 0)
cats[ncats++] = c + _dinfo._catOffsets[i] - 1;
}
}
final int n = chunks.length - _dinfo._responses;
for (; i < n; ++i) {
//can be NA if skipMissing() == false
double d = chunks[i].at0(r);
if (_dinfo._normSub != null)
d -= _dinfo._normSub[i - _dinfo._cats];
if (_dinfo._normMul != null)
d *= _dinfo._normMul[i - _dinfo._cats];
nums[i - _dinfo._cats] = d;
}
for (i = 0; i < _dinfo._responses; ++i) {
response[i] = chunks[chunks.length - _dinfo._responses + i].at0(r);
if (_dinfo._normRespSub != null)
response[i] -= _dinfo._normRespSub[i];
if (_dinfo._normRespMul != null)
response[i] *= _dinfo._normRespMul[i];
// skip rows without a valid response (no supervised training possible)
if (Double.isNaN(response[i]))
continue OUTER;
}
long seed = offset + rrr * (end - start) + r;
if (outputs != null && outputs.length > 0)
processRow(seed, nums, ncats, cats, response, outputs);
else
processRow(seed, nums, ncats, cats, response);
}
}
chunkDone(num_processed_rows);
}
Aggregations