use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class CoxPH method init.
@Override
protected void init() {
super.init();
if ((start_column != null) && !start_column.isInt())
throw new IllegalArgumentException("start time must be null or of type integer");
if (!stop_column.isInt())
throw new IllegalArgumentException("stop time must be of type integer");
if (!event_column.isInt() && !event_column.isEnum())
throw new IllegalArgumentException("event must be of type integer or factor");
if ((event_column.isInt() && (event_column.min() == event_column.max())) || (event_column.isEnum() && (event_column.cardinality() < 2)))
throw new IllegalArgumentException("event column contains less than two distinct values");
if (Double.isNaN(lre_min) || lre_min <= 0)
throw new IllegalArgumentException("lre_min must be a positive number");
if (iter_max < 1)
throw new IllegalArgumentException("iter_max must be a positive integer");
final long min_time = (start_column == null) ? (long) stop_column.min() : (long) start_column.min() + 1;
final int n_time = (int) (stop_column.max() - min_time + 1);
if (n_time < 1)
throw new IllegalArgumentException("start times must be strictly less than stop times");
if (n_time > MAX_TIME_BINS)
throw new IllegalArgumentException("number of distinct stop times is " + n_time + "; maximum number allowed is " + MAX_TIME_BINS);
source = getSubframe();
int n_resp = 2;
if (weights_column != null)
n_resp++;
if (start_column != null)
n_resp++;
final DataInfo dinfo = new DataInfo(source, n_resp, false, false, DataInfo.TransformType.DEMEAN);
model = new CoxPHModel(this, dest(), source._key, source, null);
model.initStats(source, dinfo);
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class CoxPH method execImpl.
@Override
protected void execImpl() {
final DataInfo dinfo = model.data_info;
final int n_offsets = (model.parameters.offset_columns == null) ? 0 : model.parameters.offset_columns.length;
final int n_coef = dinfo.fullN() - n_offsets;
final double[] step = MemoryManager.malloc8d(n_coef);
final double[] oldCoef = MemoryManager.malloc8d(n_coef);
final double[] newCoef = MemoryManager.malloc8d(n_coef);
Arrays.fill(step, Double.NaN);
Arrays.fill(oldCoef, Double.NaN);
for (int j = 0; j < n_coef; ++j) newCoef[j] = init;
double oldLoglik = -Double.MAX_VALUE;
final int n_time = (int) (model.max_time - model.min_time + 1);
final boolean has_start_column = (model.parameters.start_column != null);
final boolean has_weights_column = (model.parameters.weights_column != null);
for (int i = 0; i <= iter_max; ++i) {
model.iter = i;
final CoxPHTask coxMR = new CoxPHTask(self(), dinfo, newCoef, model.min_time, n_time, n_offsets, has_start_column, has_weights_column).doAll(dinfo._adaptedFrame);
final double newLoglik = model.calcLoglik(coxMR);
if (newLoglik > oldLoglik) {
if (i == 0)
model.calcCounts(coxMR);
model.calcModelStats(newCoef, newLoglik);
model.calcCumhaz_0(coxMR);
if (newLoglik == 0)
model.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
else
model.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
if (model.lre >= lre_min)
break;
Arrays.fill(step, 0);
for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model.var_coef[j][k] * model.gradient[k];
for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j]))
break;
oldLoglik = newLoglik;
System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
} else {
for (int j = 0; j < n_coef; ++j) step[j] /= 2;
}
for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
}
final Futures fs = new Futures();
DKV.put(dest(), model, fs);
fs.blockForPending();
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class DeepLearning method prepareDataInfo.
/**
* Helper to create a DataInfo object from the source and response
* @return DataInfo object
*/
private DataInfo prepareDataInfo() {
final boolean del_enum_resp = classification && !response.isEnum();
final Frame train = FrameTask.DataInfo.prepareFrame(source, autoencoder ? null : response, ignored_cols, classification, ignore_const_cols, true);
final DataInfo dinfo = new //use all FactorLevels for auto-encoder
FrameTask.DataInfo(//use all FactorLevels for auto-encoder
train, //use all FactorLevels for auto-encoder
autoencoder ? 0 : 1, //use all FactorLevels for auto-encoder
true, //use all FactorLevels for auto-encoder
autoencoder || use_all_factor_levels, //transform predictors
autoencoder ? DataInfo.TransformType.NORMALIZE : DataInfo.TransformType.STANDARDIZE, //transform response
classification ? DataInfo.TransformType.NONE : DataInfo.TransformType.STANDARDIZE);
if (!autoencoder) {
//convention from DataInfo: response is the last Vec
final Vec resp = dinfo._adaptedFrame.lastVec();
//either regression or enum response
assert (!classification ^ resp.isEnum()) : "Must have enum response for classification!";
if (del_enum_resp)
ltrash(resp);
}
return dinfo;
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class GramMatrixTest method testProstate.
@Test
public void testProstate() {
File f2 = find_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
Key ikey2 = NFSFileVec.make(f2);
Key okey2 = Key.make("glm_model2");
Frame fr2 = null;
try {
fr2 = ParseDataset2.parse(okey2, new Key[] { ikey2 });
DataInfo dinfo = new DataInfo(fr2, 0, true, false, DataInfo.TransformType.NONE);
GramTask gt = new GramTask(null, dinfo, true, false);
gt.doAll(dinfo._adaptedFrame);
double[][] res = gt._gram.getXX();
System.out.println(Utils.pprint(gt._gram.getXX()));
for (int i = 0; i < exp_result.length; ++i) for (int j = 0; j < exp_result.length; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
gt = new GramTask(null, dinfo, false, false);
gt.doAll(dinfo._adaptedFrame);
for (int i = 0; i < exp_result.length - 1; ++i) for (int j = 0; j < exp_result.length - 1; ++j) assertEquals(exp_result[i][j], gt._nobs * res[i][j], 1e-5);
} finally {
fr2.delete();
}
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class GLM2 method init.
@Override
public void init() {
try {
super.init();
if (family == Family.gamma)
setHighAccuracy();
if (link == Link.family_default)
link = family.defaultLink;
_intercept = intercept ? 1 : 0;
// TODO
tweedie_link_power = 1 - tweedie_variance_power;
if (tweedie_link_power == 0)
link = Link.log;
_glm = new GLMParams(family, tweedie_variance_power, link, tweedie_link_power);
source2 = new Frame(source);
assert sorted(ignored_cols);
source2.remove(ignored_cols);
if (offset != null)
// remove offset and add it later explicitly (so that it does not interfere with DataInfo.prepareFrame)
source2.remove(source2.find(offset));
if (nlambdas == -1)
nlambdas = 100;
if (lambda_search && lambda.length > 1)
throw new IllegalArgumentException("Can not supply both lambda_search and multiple lambdas. If lambda_search is on, GLM expects only one value of lambda_value, representing the lambda_value min (smallest lambda_value in the lambda_value search).");
// check the response
if (response.isEnum() && family != Family.binomial)
throw new IllegalArgumentException("Invalid response variable, trying to run regression with categorical response!");
switch(family) {
case poisson:
case tweedie:
if (response.min() < 0)
throw new IllegalArgumentException("Illegal response column for family='" + family + "', response must be >= 0.");
break;
case gamma:
if (response.min() <= 0)
throw new IllegalArgumentException("Invalid response for family='Gamma', response must be > 0!");
break;
case binomial:
if (response.min() < 0 || response.max() > 1)
throw new IllegalArgumentException("Illegal response column for family='Binomial', response must in <0,1> range!");
break;
default:
}
toEnum = family == Family.binomial && (!response.isEnum() && (response.min() < 0 || response.max() > 1));
if (source2.numCols() <= 1 && !intercept)
throw new IllegalArgumentException("There are no predictors left after ignoring constant columns in the dataset and no intercept => No parameters to estimate.");
Frame fr = DataInfo.prepareFrame(source2, response, new int[0], toEnum, true, true);
if (offset != null) {
// now put the offset just in front of response
int id = source.find(offset);
String name = source.names()[id];
String responseName = fr.names()[fr.numCols() - 1];
Vec responseVec = fr.remove(fr.numCols() - 1);
fr.add(name, offset);
fr.add(responseName, responseVec);
_noffsets = 1;
}
TransformType dt = TransformType.NONE;
if (standardize)
dt = intercept ? TransformType.STANDARDIZE : TransformType.DESCALE;
_srcDinfo = new DataInfo(fr, 1, intercept, use_all_factor_levels || lambda_search, dt, DataInfo.TransformType.NONE);
if (offset != null && dt != TransformType.NONE) {
// do not standardize offset
if (_srcDinfo._normMul != null)
_srcDinfo._normMul[_srcDinfo._normMul.length - 1] = 1;
if (_srcDinfo._normSub != null)
_srcDinfo._normSub[_srcDinfo._normSub.length - 1] = 0;
}
if (!intercept && _srcDinfo._cats > 0)
throw new IllegalArgumentException("Models with no intercept are only supported with all-numeric predictors.");
_activeData = _srcDinfo;
if (higher_accuracy)
setHighAccuracy();
if (beta_constraints != null) {
Vec v = beta_constraints.vec("names");
if (v == null)
throw new IllegalArgumentException("Invalid beta constraints file, missing column with predictor names");
// for now only enums allowed here
String[] dom = v.domain();
String[] names = Utils.append(_srcDinfo.coefNames(), "Intercept");
int[] map = Utils.asInts(v);
HashSet<Integer> s = new HashSet<Integer>();
for (int i : map) if (!s.add(i))
throw new IllegalArgumentException("Invalid beta constraints file, got duplicate constraints for '" + dom[i] + "'");
if (!Arrays.deepEquals(dom, names)) {
// need mapping
HashMap<String, Integer> m = new HashMap<String, Integer>();
for (int i = 0; i < names.length; ++i) {
m.put(names[i], i);
}
int[] newMap = MemoryManager.malloc4(map.length);
for (int i = 0; i < map.length; ++i) {
Integer I = m.get(dom[map[i]]);
if (I == null)
throw new IllegalArgumentException("unknown predictor name '" + dom[map[i]] + "'");
newMap[i] = I == null ? -1 : I;
}
map = newMap;
}
final int numoff = _srcDinfo.numStart();
if ((v = beta_constraints.vec("lower_bounds")) != null) {
_lbs = map == null ? Utils.asDoubles(v) : mapVec(Utils.asDoubles(v), makeAry(names.length, Double.NEGATIVE_INFINITY), map);
// for(int i = 0; i < _lbs.length; ++i)
// if(_lbs[i] > 0) throw new IllegalArgumentException("lower bounds must be non-positive");
System.out.println("lower bounds = " + Arrays.toString(_lbs));
if (_srcDinfo._normMul != null) {
for (int i = numoff; i < _srcDinfo.fullN(); ++i) {
if (Double.isInfinite(_lbs[i]))
continue;
_lbs[i] /= _srcDinfo._normMul[i - numoff];
}
}
}
System.out.println("lbs = " + Arrays.toString(_lbs));
if ((v = beta_constraints.vec("upper_bounds")) != null) {
_ubs = map == null ? Utils.asDoubles(v) : mapVec(Utils.asDoubles(v), makeAry(names.length, Double.POSITIVE_INFINITY), map);
System.out.println("upper bounds = " + Arrays.toString(_ubs));
// if (_ubs[i] < 0) throw new IllegalArgumentException("lower bounds must be non-positive");
if (_srcDinfo._normMul != null)
for (int i = numoff; i < _srcDinfo.fullN(); ++i) {
if (Double.isInfinite(_ubs[i]))
continue;
_ubs[i] /= _srcDinfo._normMul[i - numoff];
}
}
System.out.println("ubs = " + Arrays.toString(_ubs));
if (_lbs != null && _ubs != null) {
for (int i = 0; i < _lbs.length; ++i) if (_lbs[i] > _ubs[i])
throw new IllegalArgumentException("Invalid upper/lower bounds: lower bounds must be <= upper bounds for all variables.");
}
if ((v = beta_constraints.vec("beta_given")) != null) {
_bgs = map == null ? Utils.asDoubles(v) : mapVec(Utils.asDoubles(v), makeAry(names.length, 0), map);
if (_srcDinfo._normMul != null) {
double norm = 0;
for (int i = numoff; i < _srcDinfo.fullN(); ++i) {
norm += _bgs[i] * _srcDinfo._normSub[i - numoff];
_bgs[i] /= _srcDinfo._normMul[i - numoff];
}
if (_intercept == 1)
_bgs[_bgs.length - 1] -= norm;
}
}
if ((v = beta_constraints.vec("rho")) != null)
_rho = map == null ? Utils.asDoubles(v) : mapVec(Utils.asDoubles(v), makeAry(names.length, 0), map);
else if (_bgs != null)
throw new IllegalArgumentException("Missing vector of penalties (rho) in beta_constraints file.");
String[] cols = new String[] { "names", "rho", "beta_given", "lower_bounds", "upper_bounds" };
Arrays.sort(cols);
for (String str : beta_constraints.names()) if (Arrays.binarySearch(cols, str) < 0)
Log.warn("unknown column in beta_constraints file: '" + str + "'");
}
if (non_negative) {
// make srue lb is >= 0
if (_lbs == null)
_lbs = new double[_srcDinfo.fullN() + 1];
// no bounds for intercept
_lbs[_srcDinfo.fullN()] = Double.NEGATIVE_INFINITY;
for (int i = 0; i < _lbs.length; ++i) if (_lbs[i] < 0)
_lbs[i] = 0;
}
} catch (RuntimeException e) {
e.printStackTrace();
cleanup();
throw e;
}
}
Aggregations