use of water.parser.Enum in project h2o-2 by h2oai.
the class ParseDataset2 method parse_impl.
// --------------------------------------------------------------------------
// Top-level parser driver
private static void parse_impl(ParseDataset2 job, Key[] fkeys, CustomParser.ParserSetup setup, boolean delete_on_done) {
assert setup._ncols > 0;
if (fkeys.length == 0) {
job.cancel();
return;
}
// Remove any previous instance and insert a sentinel (to ensure no one has
// been writing to the same keys during our parse)!
Vec v = getVec(fkeys[0]);
int reserveKeys = setup._pType == ParserType.SVMLight ? 25000000 : setup._ncols;
VectorGroup vg = v.group();
int vecIdStart = vg.reserveKeys(reserveKeys);
MultiFileParseTask mfpt = job._mfpt = new MultiFileParseTask(v.group(), vecIdStart, setup, job._progress);
mfpt.invoke(fkeys);
EnumUpdateTask eut = null;
// Calculate enum domain
int n = 0;
AppendableVec[] avs = mfpt.vecs();
if ((avs.length + vecIdStart) < reserveKeys) {
Future f = vg.tryReturnKeys(vecIdStart + reserveKeys, vecIdStart + avs.length);
if (f != null)
try {
f.get();
} catch (InterruptedException e) {
} catch (ExecutionException e) {
}
}
int[] ecols = new int[avs.length];
for (int i = 0; i < ecols.length; ++i) if (avs[i].shouldBeEnum())
ecols[n++] = i;
ecols = Arrays.copyOf(ecols, n);
if (ecols.length > 0) {
EnumFetchTask eft = new EnumFetchTask(H2O.SELF.index(), mfpt._eKey, ecols).invokeOnAllNodes();
Enum[] enums = eft._gEnums;
ValueString[][] ds = new ValueString[ecols.length][];
int j = 0;
for (int i : ecols) avs[i]._domain = ValueString.toString(ds[j++] = enums[i].computeColumnDomain());
eut = new EnumUpdateTask(ds, eft._lEnums, mfpt._chunk2Enum, mfpt._eKey, ecols);
}
final Frame fr = new Frame(job.dest(), setup._columnNames != null ? setup._columnNames : genericColumnNames(avs.length), AppendableVec.closeAll(avs));
// SVMLight is sparse format, there may be missing chunks with all 0s, fill them in
new SVFTask(fr).invokeOnAllNodes();
fr.checkCompatible();
// Update enums to the globally agreed numbering
if (eut != null) {
Vec[] evecs = new Vec[ecols.length];
for (int i = 0; i < evecs.length; ++i) evecs[i] = fr.vecs()[ecols[i]];
eut.doAll(evecs);
}
Futures fs = new Futures();
for (Vec v2 : fr.vecs()) v2.rollupStats(fs);
fs.blockForPending();
logParseResults(job, fr);
// Release the frame for overwriting
fr.unlock(job.self());
// Remove CSV files from H2O memory
if (delete_on_done)
for (Key k : fkeys) Lockable.delete(k, job.self());
else
for (Key k : fkeys) {
Lockable l = UKV.get(k);
l.unlock(job.self());
}
job.remove();
}
Aggregations