use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstUnique method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Vec vec0 = fr.vec(0);
Vec v;
if (fr.numCols() != 1)
throw new IllegalArgumentException("Unique applies to a single column only.");
if (vec0.isCategorical()) {
v = Vec.makeSeq(0, (long) vec0.domain().length, true);
v.setDomain(vec0.domain());
DKV.put(v);
} else {
UniqTask t = new UniqTask().doAll(fr);
int nUniq = t._uniq.size();
final AstGroup.G[] uniq = t._uniq.keySet().toArray(new AstGroup.G[nUniq]);
v = Vec.makeZero(nUniq, vec0.get_type());
new MRTask() {
@Override
public void map(Chunk c) {
int start = (int) c.start();
for (int i = 0; i < c._len; ++i) c.set(i, uniq[i + start]._gs[0]);
}
}.doAll(v);
}
return new ValFrame(new Frame(v));
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class Merge method sort.
// Radix-sort a Frame using the given columns as keys.
// This is a fully distributed and parallel sort.
// It is not currently an in-place sort, so the data is doubled and a sorted copy is returned.
public static Frame sort(final Frame fr, int[] cols) {
if (// Empty key list
cols.length == 0)
// Return original frame
return fr;
for (int col : cols) if (col < 0 || col >= fr.numCols())
throw new IllegalArgumentException("Column " + col + " is out of range of " + fr.numCols());
// All identity ID maps
int[][] id_maps = new int[cols.length][];
for (int i = 0; i < cols.length; i++) {
Vec vec = fr.vec(cols[i]);
if (vec.isCategorical()) {
String[] domain = vec.domain();
id_maps[i] = new int[domain.length];
for (int j = 0; j < domain.length; j++) id_maps[i][j] = j;
}
}
return Merge.merge(fr, new Frame(new Vec[0]), cols, new int[0], true, /*allLeft*/
id_maps);
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstLs method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
ArrayList<String> domain = new ArrayList<>();
Futures fs = new Futures();
AppendableVec av = new AppendableVec(Vec.VectorGroup.VG_LEN1.addVec(), Vec.T_CAT);
NewChunk keys = new NewChunk(av, 0);
int r = 0;
for (Key key : KeySnapshot.globalSnapshot().keys()) {
keys.addCategorical(r++);
domain.add(key.toString());
}
String[] key_domain = domain.toArray(new String[domain.size()]);
av.setDomain(key_domain);
keys.close(fs);
// c0 is the row index vec
Vec c0 = av.layout_and_close(fs);
fs.blockForPending();
return new ValFrame(new Frame(Key.<Frame>make("h2o_ls"), new String[] { "key" }, new Vec[] { c0 }));
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstAsNumeric method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Vec[] nvecs = new Vec[fr.numCols()];
Vec vv;
for (int c = 0; c < nvecs.length; ++c) {
vv = fr.vec(c);
try {
nvecs[c] = vv.toNumericVec();
} catch (Exception e) {
VecUtils.deleteVecs(nvecs, c);
throw e;
}
}
return new ValFrame(new Frame(fr._names, nvecs));
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class BogusV3 method testAPIThrPriorities.
@Test
public void testAPIThrPriorities() throws IOException {
Frame fr = null;
Bogus blder = null;
Job<BogusModel> job = null;
Vec vec = null;
try {
// Get some keys & frames loaded
fr = parse_test_file(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv");
vec = Vec.makeZero(100);
// Basic test plan:
// Start a "long running model-builder job". This job will start using the
// nomial model-builder strategy, then block in the driver "as if" it's
// working hard. Imagine DL slamming all cores. We record the F/J
// priority we're running on.
//
// Then we make a REST-style call thru RequestServer looking for some
// stuff; list all frames, cloud status, view a frame (rollups). During
// these actions we record F/J queue priorities - and assert this work is
// all running higher than the DL/model-build priority.
// TODO: Make a more sophisticated builder that launches a MRTask internally,
// which blocks on ALL NODES - before we begin doing rollups. Then check
// the rollups priorities ON ALL NODES, not just this one.
// Build and launch the builder
BogusModel.BogusParameters parms = new BogusModel.BogusParameters();
blder = new Bogus(parms);
job = blder.trainModel();
// Block till the builder sets _driver_priority, and is blocked on state==1
synchronized (blder) {
while (blder._state == 0) try {
blder.wait();
} catch (InterruptedException ignore) {
}
assert blder._state == 1;
}
int driver_prior = blder._driver_priority;
Properties urlparms;
// Now that the builder is blocked at some priority, do some GUI work which
// needs to be at a higher priority. It comes in on a non-FJ thread
// (probably Nano or Jetty) but anything that hits the F/J queue needs to
// be higher
Assert.assertEquals(0, H2O.LOW_PRIORITY_API_WORK);
Assert.assertNull(H2O.LOW_PRIORITY_API_WORK_CLASS);
H2O.LOW_PRIORITY_API_WORK = driver_prior + 1;
// Many URLs behave.
// Broken hack URLs:
serve("/", null, 301);
serve("/junk", null, 404);
serve("/HTTP404", null, 404);
// Basic: is H2O up?
serve("/3/Cloud", null, 200);
serve("/3/About", null, 200);
// What is H2O doing?
urlparms = new Properties();
urlparms.setProperty("depth", "10");
serve("/3/Profiler", urlparms, 200);
serve("/3/JStack", null, 200);
serve("/3/KillMinus3", null, 200);
serve("/3/Timeline", null, 200);
serve("/3/Jobs", null, 200);
serve("/3/WaterMeterCpuTicks/0", null, 200);
serve("/3/WaterMeterIo", null, 200);
serve("/3/Logs/download", null, 200);
serve("/3/NetworkTest", null, 200);
// Rollup stats behave
final Key rskey = vec.rollupStatsKey();
// Rollups on my zeros not computed yet
Assert.assertNull(DKV.get(rskey));
vec.sigma();
// Rollups on my zeros not computed yet
Assert.assertNotNull(DKV.get(rskey));
// Rollups already done at parse, but gets ChunkSummary
serve("/3/Frames/iris.hex", null, 200);
// Convenience; inspection of simple stuff
urlparms = new Properties();
urlparms.setProperty("src", "./smalldata/iris");
serve("/3/Typeahead/files", urlparms, 200);
urlparms = new Properties();
urlparms.setProperty("key", "iris.hex");
urlparms.setProperty("row", "0");
urlparms.setProperty("match", "foo");
serve("/3/Find", urlparms, 200);
serve("/3/Metadata/endpoints", null, 200);
serve("/3/Frames", null, 200);
serve("/3/Models", null, 200);
serve("/3/ModelMetrics", null, 200);
serve("/3/NodePersistentStorage/configured", null, 200);
// Recovery
//serve("/3/Shutdown", null,200); // OOPS! Don't really want to run this one, unless we're all done with testing
// delete must happen after rollups above!
serve("/3/DKV", null, 200, "DELETE");
serve("/3/LogAndEcho", null, 200, "POST");
serve("/3/InitID", null, 200);
serve("/3/GarbageCollect", null, 200, "POST");
// Turn off debug tracking
H2O.LOW_PRIORITY_API_WORK = 0;
H2O.LOW_PRIORITY_API_WORK_CLASS = null;
// Allow the builder to complete.
// reinstate the JOB in the DKV, because JOB demands it.
DKV.put(job);
synchronized (blder) {
blder._state = 2;
blder.notify();
}
// Block for builder to complete
job.get();
} finally {
// Turn off debug tracking
H2O.LOW_PRIORITY_API_WORK = 0;
H2O.LOW_PRIORITY_API_WORK_CLASS = null;
if (blder != null)
synchronized (blder) {
blder._state = 2;
blder.notify();
}
if (job != null)
job.remove();
if (vec != null)
vec.remove();
if (fr != null)
fr.delete();
}
}
Aggregations