use of water.Key in project h2o-3 by h2oai.
the class DataInfoTestAdapt method testInteractionTrainTestSplitAdapt.
@Test
public void testInteractionTrainTestSplitAdapt() {
DataInfo dinfo = null, scoreInfo = null;
Frame fr = null, expanded = null;
Frame[] frSplits = null, expandSplits = null;
String[] interactions = new String[] { "class", "sepal_len" };
boolean useAll = false;
// golden frame is standardized before splitting, while frame we want to check would be standardized post-split (not exactly what we want!)
boolean standardize = false;
boolean skipMissing = true;
try {
fr = parse_test_file(Key.make("a.hex"), "smalldata/iris/iris_wheader.csv");
fr.swap(3, 4);
// here's the "golden" frame
expanded = GLMModel.GLMOutput.expand(fr, interactions, useAll, standardize, skipMissing);
// now split fr and expanded
long seed;
frSplits = ShuffleSplitFrame.shuffleSplitFrame(fr, new Key[] { Key.make(), Key.make() }, new double[] { 0.8, 0.2 }, seed = new Random().nextLong());
expandSplits = ShuffleSplitFrame.shuffleSplitFrame(expanded, new Key[] { Key.make(), Key.make() }, new double[] { 0.8, 0.2 }, seed);
// check1: verify splits. expand frSplits with DataInfo and check against expandSplits
checkSplits(frSplits, expandSplits, interactions, useAll, standardize);
// now take the test frame from frSplits, and adapt it to a DataInfo built on the train frame
dinfo = makeInfo(frSplits[0], interactions, useAll, standardize);
GLMModel.GLMParameters parms = new GLMModel.GLMParameters();
parms._response_column = "petal_wid";
Model.adaptTestForTrain(frSplits[1], null, null, dinfo._adaptedFrame.names(), dinfo._adaptedFrame.domains(), parms, true, false, interactions, null, null, false);
scoreInfo = dinfo.scoringInfo(dinfo._adaptedFrame._names, frSplits[1]);
checkFrame(scoreInfo, expandSplits[1]);
} finally {
cleanup(fr, expanded);
cleanup(frSplits);
cleanup(expandSplits);
cleanup(dinfo, scoreInfo);
}
}
use of water.Key in project h2o-3 by h2oai.
the class AstLevels method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame f = stk.track(asts[1].exec(env)).getFrame();
Futures fs = new Futures();
Key[] keys = Vec.VectorGroup.VG_LEN1.addVecs(f.numCols());
Vec[] vecs = new Vec[keys.length];
// compute the longest vec... that's the one with the most domain levels
int max = 0;
for (int i = 0; i < f.numCols(); ++i) if (f.vec(i).isCategorical())
if (max < f.vec(i).domain().length)
max = f.vec(i).domain().length;
final int rowLayout = Vec.ESPC.rowLayout(keys[0], new long[] { 0, max });
for (int i = 0; i < f.numCols(); ++i) {
AppendableVec v = new AppendableVec(keys[i], Vec.T_NUM);
NewChunk nc = new NewChunk(v, 0);
String[] dom = f.vec(i).domain();
int numToPad = dom == null ? max : max - dom.length;
if (dom != null)
for (int j = 0; j < dom.length; ++j) nc.addNum(j);
for (int j = 0; j < numToPad; ++j) nc.addNA();
nc.close(0, fs);
vecs[i] = v.close(rowLayout, fs);
vecs[i].setDomain(dom);
}
fs.blockForPending();
Frame fr2 = new Frame(vecs);
return new ValFrame(fr2);
}
use of water.Key in project h2o-3 by h2oai.
the class AstMad method mad.
public static double mad(Frame f, QuantileModel.CombineMethod cm, double constant) {
// need Frames everywhere because of QuantileModel demanding a Frame...
Key tk = null;
if (f._key == null) {
DKV.put(tk = Key.make(), f = new Frame(tk, f.names(), f.vecs()));
}
final double median = AstMedian.median(f, cm);
Frame abs_dev = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i = 0; i < c._len; ++i) nc.addNum(Math.abs(c.at8(i) - median));
}
}.doAll(1, Vec.T_NUM, f).outputFrame();
if (abs_dev._key == null) {
DKV.put(tk = Key.make(), abs_dev = new Frame(tk, abs_dev.names(), abs_dev.vecs()));
}
double mad = AstMedian.median(abs_dev, cm);
// drp mapping, keep vec
DKV.remove(f._key);
DKV.remove(abs_dev._key);
return constant * mad;
}
use of water.Key in project h2o-2 by h2oai.
the class FillNAsWithMeanDemo02 method frame_001.
@Test
public void frame_001() {
String fileName = "./cookbookData/iris_withNA.csv";
//String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Frame f = DKV.get(okey).get();
Log.info("frame : " + f);
int len = f.numCols();
Vec[] vv = f.vecs();
double[] arrayofMeans = new double[len];
for (int i = 0; i < len; i++) // array of means to be passed as params to map reduce task
arrayofMeans[i] = vv[i].mean();
Vec[] newVecs = vv[0].makeZeros(len);
newVecs[4]._domain = vv[4]._domain;
String[] newcolnames = { "1", "2", "3", "4", "5" };
Frame output = frame(newcolnames, newVecs);
// the holder frame added to original frame
f.add(output, newcolnames);
// map reduce call
FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(f);
Log.info("frame : " + f);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
Frame.delete(okey);
}
use of water.Key in project h2o-3 by h2oai.
the class GLMBasicTestRegression method setup.
@BeforeClass
public static void setup() throws IOException {
stall_till_cloudsize(1);
File f = getFile("smalldata/glm_test/cancar_logIn.csv");
assert f.exists();
NFSFileVec nfs = NFSFileVec.make(f);
Key outputKey = Key.make("prostate_cat_train.hex");
_canCarTrain = ParseDataset.parse(outputKey, nfs._key);
_canCarTrain.add("Merit", (_merit = _canCarTrain.remove("Merit")).toCategoricalVec());
_canCarTrain.add("Class", (_class = _canCarTrain.remove("Class")).toCategoricalVec());
DKV.put(_canCarTrain._key, _canCarTrain);
f = getFile("smalldata/glm_test/earinf.txt");
nfs = NFSFileVec.make(f);
outputKey = Key.make("earinf.hex");
_earinf = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_earinf._key, _earinf);
f = getFile("smalldata/glm_test/weighted.csv");
nfs = NFSFileVec.make(f);
outputKey = Key.make("weighted.hex");
_weighted = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_weighted._key, _weighted);
f = getFile("smalldata/glm_test/upsampled.csv");
nfs = NFSFileVec.make(f);
outputKey = Key.make("upsampled.hex");
_upsampled = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_upsampled._key, _upsampled);
_prostateTrain = parse_test_file("smalldata/glm_test/prostate_cat_train.csv");
_airlines = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
Vec v = _airlines.remove("IsDepDelayed");
Vec v2 = v.makeCopy(null);
_airlines.add("IsDepDelayed", v2);
v.remove();
DKV.put(_airlines._key, _airlines);
// System.out.println("made copy of vec " + v._key + " -> " + v2._key + ", in DKV? src =" + ((DKV.get(v._key) != null)) + ", dst = " + (DKV.get(v2._key) != null));
_airlinesMM = parse_test_file(Key.make("AirlinesMM"), "smalldata/airlines/AirlinesTrainMM.csv.zip");
v = _airlinesMM.remove("IsDepDelayed");
_airlinesMM.add("IsDepDelayed", v.makeCopy(null));
v.remove();
DKV.put(_airlinesMM._key, _airlinesMM);
}
Aggregations