use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class NaiveBayes method execImpl.
@Override
protected void execImpl() {
long before = System.currentTimeMillis();
Frame fr = DataInfo.prepareFrame(source, response, ignored_cols, false, true, /*drop const*/
drop_na_cols);
DataInfo dinfo = new DataInfo(fr, 1, false, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE);
NBTask tsk = new NBTask(this, dinfo).doAll(dinfo._adaptedFrame);
NBModel myModel = buildModel(dinfo, tsk, laplace, min_std_dev);
myModel.start_training(before);
myModel.stop_training();
myModel.delete_and_lock(self());
myModel.unlock(self());
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class PCA method execImpl.
@Override
protected void execImpl() {
Frame fr = selectFrame(source);
Vec[] vecs = fr.vecs();
// Remove constant cols and cols with too many NAs
ArrayList<Integer> removeCols = new ArrayList<Integer>();
for (int i = 0; i < vecs.length; i++) {
if (vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length() * 0.2)
// if(vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length()*0.2 || vecs[i].domain() != null)
removeCols.add(i);
}
if (!removeCols.isEmpty()) {
int[] cols = new int[removeCols.size()];
for (int i = 0; i < cols.length; i++) cols[i] = removeCols.get(i);
fr.remove(cols);
}
if (fr.numCols() < 2)
throw new IllegalArgumentException("Need more than one column to run PCA");
DataInfo dinfo = new DataInfo(fr, 0, false, false, standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE);
GramTask tsk = new GramTask(self(), dinfo, false, false).doAll(dinfo._adaptedFrame);
PCAModel myModel = buildModel(dinfo, tsk);
myModel.delete_and_lock(self());
myModel.unlock(self());
// Close/remove job
remove();
final JobState state = UKV.<Job>get(self()).state;
new TAtomic<PCAModel>() {
@Override
public PCAModel atomic(PCAModel m) {
if (m != null)
m.get_params().state = state;
return m;
}
}.invoke(dest());
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class PCAScore method execImpl.
@Override
protected void execImpl() {
// Note: Source data MUST contain all features (matched by name) used to build PCA model!
// If additional columns exist in source, they are automatically ignored in scoring
new Frame(destination_key, new String[0], new Vec[0]).delete_and_lock(self());
Frame fr = model.adapt(source, true)[0];
int nfeat = model._names.length;
DataInfo dinfo = new DataInfo(fr, 0, false, false, model.normSub, model.normMul, DataInfo.TransformType.STANDARDIZE, null, null);
PCAScoreTask tsk = new PCAScoreTask(this, dinfo, nfeat, num_pc, model.eigVec);
tsk.doAll(num_pc, dinfo._adaptedFrame);
String[] names = new String[num_pc];
String[][] domains = new String[num_pc][];
for (int i = 0; i < num_pc; i++) {
names[i] = "PC" + i;
domains[i] = null;
}
tsk.outputFrame(destination_key, names, domains).unlock(self());
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class DeepLearning method buildModel.
/**
* Train a Deep Learning model, assumes that all members are populated
* If checkpoint == null, then start training a new model, otherwise continue from a checkpoint
*/
private void buildModel() {
DeepLearningModel cp = null;
if (checkpoint == null) {
cp = initModel();
cp.start_training(null);
} else {
final DeepLearningModel previous = UKV.get(checkpoint);
if (previous == null)
throw new IllegalArgumentException("Checkpoint not found.");
Log.info("Resuming from checkpoint.");
if (n_folds != 0) {
throw new UnsupportedOperationException("n_folds must be 0: Cross-validation is not supported during checkpoint restarts.");
} else {
//remove existing cross-validation keys after checkpoint restart
((ValidatedJob) previous.job()).xval_models = null;
}
if (source == null || (previous.model_info().get_params().source != null && !Arrays.equals(source._key._kb, previous.model_info().get_params().source._key._kb))) {
throw new IllegalArgumentException("source must be the same as for the checkpointed model.");
}
autoencoder = previous.model_info().get_params().autoencoder;
if (!autoencoder && (response == null || !source.names()[source.find(response)].equals(previous.responseName()))) {
throw new IllegalArgumentException("response must be the same as for the checkpointed model.");
}
// }
if (Utils.difference(ignored_cols, previous.model_info().get_params().ignored_cols).length != 0 || Utils.difference(previous.model_info().get_params().ignored_cols, ignored_cols).length != 0) {
ignored_cols = previous.model_info().get_params().ignored_cols;
Log.warn("Automatically re-using ignored_cols from the checkpointed model.");
}
if ((validation == null) == (previous._validationKey != null) || (validation != null && validation._key != null && previous._validationKey != null && !Arrays.equals(validation._key._kb, previous._validationKey._kb))) {
throw new IllegalArgumentException("validation must be the same as for the checkpointed model.");
}
if (classification != previous.model_info().get_params().classification) {
Log.warn("Automatically switching to " + ((classification = !classification) ? "classification" : "regression") + " (same as the checkpointed model).");
}
//add new epochs to existing model
epochs += previous.epoch_counter;
Log.info("Adding " + String.format("%.3f", previous.epoch_counter) + " epochs from the checkpointed model.");
try {
final DataInfo dataInfo = prepareDataInfo();
cp = new DeepLearningModel(previous, destination_key, job_key, dataInfo);
cp.write_lock(self());
cp.start_training(previous);
assert (state == JobState.RUNNING);
final DeepLearning A = cp.model_info().get_params();
Object B = this;
for (Field fA : A.getClass().getDeclaredFields()) {
if (Utils.contains(cp_modifiable, fA.getName())) {
if (!expert_mode && Utils.contains(expert_options, fA.getName()))
continue;
for (Field fB : B.getClass().getDeclaredFields()) {
if (fA.equals(fB)) {
try {
if (fB.get(B) == null || fA.get(A) == null || !fA.get(A).toString().equals(fB.get(B).toString())) {
//if both parameters are null, we don't need to do anything
if (fA.get(A) == null && fB.get(B) == null)
continue;
Log.info("Applying user-requested modification of '" + fA.getName() + "': " + fA.get(A) + " -> " + fB.get(B));
fA.set(A, fB.get(B));
}
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
}
}
}
if (A.n_folds != 0) {
Log.warn("Disabling cross-validation: Not supported when resuming training from a checkpoint.");
A.n_folds = 0;
}
cp.update(self());
} finally {
if (cp != null)
cp.unlock(self());
}
}
trainModel(cp);
cp.stop_training();
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class DeepLearning method initModel.
/**
* Create an initial Deep Learning model, typically to be trained by trainModel(model)
* @return Randomly initialized model
*/
public final DeepLearningModel initModel() {
try {
lock_data();
checkParams();
final DataInfo dinfo = prepareDataInfo();
//convention from DataInfo: response is the last Vec
final Vec resp = dinfo._adaptedFrame.lastVec();
float[] priorDist = classification ? new MRUtils.ClassDist(resp).doAll(resp).rel_dist() : null;
final DeepLearningModel model = new DeepLearningModel(dest(), self(), source._key, dinfo, (DeepLearning) this.clone(), priorDist);
model.model_info().initializeMembers();
return model;
} finally {
unlock_data();
}
}
Aggregations