use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class GLMBasicTestRegression method setup.
@BeforeClass
public static void setup() throws IOException {
stall_till_cloudsize(1);
File f = getFile("smalldata/glm_test/cancar_logIn.csv");
assert f.exists();
NFSFileVec nfs = NFSFileVec.make(f);
Key outputKey = Key.make("prostate_cat_train.hex");
_canCarTrain = ParseDataset.parse(outputKey, nfs._key);
_canCarTrain.add("Merit", (_merit = _canCarTrain.remove("Merit")).toCategoricalVec());
_canCarTrain.add("Class", (_class = _canCarTrain.remove("Class")).toCategoricalVec());
DKV.put(_canCarTrain._key, _canCarTrain);
f = getFile("smalldata/glm_test/earinf.txt");
nfs = NFSFileVec.make(f);
outputKey = Key.make("earinf.hex");
_earinf = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_earinf._key, _earinf);
f = getFile("smalldata/glm_test/weighted.csv");
nfs = NFSFileVec.make(f);
outputKey = Key.make("weighted.hex");
_weighted = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_weighted._key, _weighted);
f = getFile("smalldata/glm_test/upsampled.csv");
nfs = NFSFileVec.make(f);
outputKey = Key.make("upsampled.hex");
_upsampled = ParseDataset.parse(outputKey, nfs._key);
DKV.put(_upsampled._key, _upsampled);
_prostateTrain = parse_test_file("smalldata/glm_test/prostate_cat_train.csv");
_airlines = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip");
Vec v = _airlines.remove("IsDepDelayed");
Vec v2 = v.makeCopy(null);
_airlines.add("IsDepDelayed", v2);
v.remove();
DKV.put(_airlines._key, _airlines);
// System.out.println("made copy of vec " + v._key + " -> " + v2._key + ", in DKV? src =" + ((DKV.get(v._key) != null)) + ", dst = " + (DKV.get(v2._key) != null));
_airlinesMM = parse_test_file(Key.make("AirlinesMM"), "smalldata/airlines/AirlinesTrainMM.csv.zip");
v = _airlinesMM.remove("IsDepDelayed");
_airlinesMM.add("IsDepDelayed", v.makeCopy(null));
v.remove();
DKV.put(_airlinesMM._key, _airlinesMM);
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class KMeansTest method testCovtype.
// "datasets directory not always available"
@Test
@Ignore
public void testCovtype() {
Frame fr = null;
try {
File f = FileUtils.locateFile("../datasets/UCI/UCI-large/covtype/covtype.data");
// Ignore if large file not found
if (f == null)
return;
NFSFileVec nfs = NFSFileVec.make(f);
fr = ParseDataset.parse(Key.make(), nfs._key);
KMeansModel.KMeansParameters parms = new KMeansModel.KMeansParameters();
parms._train = fr._key;
parms._k = 7;
parms._standardize = true;
parms._max_iterations = 100;
parms._init = KMeans.Initialization.Random;
for (int i = 0; i < 10; i++) doSeed(parms, System.nanoTime()).delete();
} finally {
if (fr != null)
fr.delete();
}
}
Aggregations