use of water.Key in project h2o-3 by h2oai.
the class ParseSetupHandler method guessSetup.
public ParseSetupV3 guessSetup(int version, ParseSetupV3 p) {
if (p.source_frames == null)
throw new H2OIllegalArgumentException("No file names given for parsing.");
Key[] fkeys = new Key[p.source_frames.length];
for (int i = 0; i < p.source_frames.length; i++) {
fkeys[i] = p.source_frames[i].key();
if (DKV.get(fkeys[i]) == null)
throw new IllegalArgumentException("Key not loaded: " + p.source_frames[i]);
}
// corrects for json putting in empty strings in the place of empty sub-arrays
if (p.na_strings != null)
for (int i = 0; i < p.na_strings.length; i++) if (p.na_strings[i] != null && p.na_strings[i].length == 0)
p.na_strings[i] = null;
ParseSetup ps;
try {
ps = ParseSetup.guessSetup(fkeys, new ParseSetup(p));
} catch (Throwable ex) {
Throwable ex2 = ex;
if (ex instanceof DistributedException)
ex2 = ex.getCause();
if (ex2 instanceof ParseDataset.H2OParseException)
throw new H2OIllegalArgumentException(ex2.getMessage());
throw ex;
}
if (ps._errs != null && ps._errs.length > 0) {
p.warnings = new String[ps._errs.length];
for (int i = 0; i < ps._errs.length; ++i) p.warnings[i] = ps._errs[i].toString();
}
// TODO: ParseSetup throws away the srcs list. . .
if ((null == p.column_name_filter || "".equals(p.column_name_filter)) && (0 == p.column_offset) && (0 == p.column_count)) {
// return the entire data preview
PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "parse_type" });
p.total_filtered_column_count = p.number_columns;
} else {
// have to manually copy the desired parts of p.data to apply either column_name_filter or column pagination or both
PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "data", "parse_type" });
String[] all_col_names = ps.getColumnNames();
String[][] data = ps.getData();
ArrayList<Integer> keep_indexes = new ArrayList<>();
if (null != p.column_name_filter && !"".equals(p.column_name_filter)) {
// filter and then paginate columns
Pattern pattern = Pattern.compile(p.column_name_filter);
Matcher m = pattern.matcher("dummy");
for (int column = 0; column < all_col_names.length; column++) {
m.reset(all_col_names[column]);
if (m.matches())
keep_indexes.add(column);
}
} else {
// note: we do a little extra work below by treating this like the filter case, but the code is simpler
for (int column = 0; column < all_col_names.length; column++) {
keep_indexes.add(column);
}
}
int width_to_return = Math.max(0, keep_indexes.size() - p.column_offset);
if (p.column_count > 0)
width_to_return = Math.min(width_to_return, p.column_count);
String[][] filtered_data = new String[data.length][width_to_return];
for (int row = 0; row < data.length; row++) {
int output_column = 0;
for (int input_column_index = p.column_offset; input_column_index < p.column_offset + width_to_return; input_column_index++) {
// indirect through keep_indexes
filtered_data[row][output_column++] = data[row][keep_indexes.get(input_column_index)];
}
}
p.data = filtered_data;
p.total_filtered_column_count = keep_indexes.size();
}
p.destination_frame = ParseSetup.createHexName(p.source_frames[0].toString());
if (p.check_header == ParseSetup.HAS_HEADER && p.data != null && Arrays.equals(p.column_names, p.data[0]))
p.data = Arrays.copyOfRange(p.data, 1, p.data.length);
// Fill in data type names for each column.
p.column_types = ps.getColumnTypeStrings();
p.parse_type = ps.getParseType() != null ? ps.getParseType().name() : GUESS_INFO.name();
return p;
}
use of water.Key in project h2o-3 by h2oai.
the class GridSchemaV99 method fillFromImpl.
@Override
public GridSchemaV99 fillFromImpl(Grid grid) {
Key<Model>[] gridModelKeys = grid.getModelKeys();
// Return only keys which are referencing to existing objects in DKV
// However, here is still implicit race, since we are sending
// keys to client, but referenced models can be deleted in meantime
// Hence, client has to be responsible for handling this situation
// - call getModel and check for null model
// pre-allocate
List<Key<Model>> modelKeys = new ArrayList<>(gridModelKeys.length);
for (Key k : gridModelKeys) {
if (k != null && DKV.get(k) != null) {
modelKeys.add(k);
}
}
// Default sort order -- TODO: Outsource
if (sort_by == null && modelKeys.size() > 0 && modelKeys.get(0) != null) {
Model m = DKV.getGet(modelKeys.get(0));
if (m != null && m.isSupervised()) {
if (m._output.nclasses() > 1) {
sort_by = "logloss";
decreasing = false;
} else {
sort_by = "residual_deviance";
decreasing = false;
}
}
}
// If not, show all possible metrics
if (modelKeys.size() > 0 && sort_by != null) {
Set<String> possibleMetrics = ModelMetrics.getAllowedMetrics(modelKeys.get(0));
if (!possibleMetrics.contains(sort_by.toLowerCase())) {
throw new H2OIllegalArgumentException("Invalid argument for sort_by specified. Must be one of: " + Arrays.toString(possibleMetrics.toArray(new String[0])));
}
}
// Are we sorting by model metrics?
if (null != sort_by && !sort_by.isEmpty()) {
// sort the model keys
modelKeys = ModelMetrics.sortModelsByMetric(sort_by, decreasing, modelKeys);
// fill the metrics arrays
training_metrics = new ModelMetricsBaseV3[modelKeys.size()];
validation_metrics = new ModelMetricsBaseV3[modelKeys.size()];
cross_validation_metrics = new ModelMetricsBaseV3[modelKeys.size()];
cross_validation_metrics_summary = new TwoDimTableV3[modelKeys.size()];
for (int i = 0; i < modelKeys.size(); i++) {
Model m = DKV.getGet(modelKeys.get(i));
if (m != null) {
Model.Output o = m._output;
if (null != o._training_metrics)
training_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._training_metrics).fillFromImpl(o._training_metrics);
if (null != o._validation_metrics)
validation_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._validation_metrics).fillFromImpl(o._validation_metrics);
if (null != o._cross_validation_metrics)
cross_validation_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._cross_validation_metrics).fillFromImpl(o._cross_validation_metrics);
if (o._cross_validation_metrics_summary != null)
cross_validation_metrics_summary[i] = new TwoDimTableV3(o._cross_validation_metrics_summary);
}
}
}
KeyV3.ModelKeyV3[] modelIds = new KeyV3.ModelKeyV3[modelKeys.size()];
Key<Model>[] keys = new Key[modelKeys.size()];
for (int i = 0; i < modelIds.length; i++) {
modelIds[i] = new KeyV3.ModelKeyV3(modelKeys.get(i));
keys[i] = modelIds[i].key();
}
grid_id = new KeyV3.GridKeyV3(grid._key);
model_ids = modelIds;
hyper_names = grid.getHyperNames();
failed_params = toModelParametersSchema(grid.getFailedParameters());
failure_details = grid.getFailureDetails();
failure_stack_traces = grid.getFailureStackTraces();
failed_raw_params = grid.getFailedRawParameters();
TwoDimTable t = grid.createSummaryTable(keys, sort_by, decreasing);
if (t != null)
summary_table = new TwoDimTableV3().fillFromImpl(t);
TwoDimTable h = grid.createScoringHistoryTable();
if (h != null)
scoring_history = new TwoDimTableV3().fillFromImpl(h);
return this;
}
use of water.Key in project h2o-3 by h2oai.
the class ParseFolderTest method testSameFile.
@Test
public void testSameFile() {
File f = FileUtils.locateFile("smalldata/iris/iris_wheader.csv");
NFSFileVec nfs1 = NFSFileVec.make(f);
NFSFileVec nfs2 = NFSFileVec.make(f);
Frame fr = null;
try {
fr = ParseDataset.parse(Key.make(), new Key[] { nfs1._key, nfs2._key }, false, /*delete on done*/
false, ParseSetup.GUESS_HEADER);
} finally {
if (fr != null)
fr.delete();
if (nfs1 != null)
nfs1.remove();
}
}
use of water.Key in project h2o-3 by h2oai.
the class ParseTimeTest method testDayParseNoTime2.
@Test
public void testDayParseNoTime2() {
DateTimeZone pst = DateTimeZone.forID("America/Los_Angeles");
DateTimeZone localTZ = DateTimeZone.getDefault();
// Just mm/dd/yyyy, no time
String data = "Date\n" + // Note evil trailing blanks
"1/23/2014 \n" + "1/24/2014 \n" + "1/23/2014 \n" + "1/24/2014\n";
Key k1 = ParserTest.makeByteVec(data);
Key r1 = Key.make("r1");
Frame fr = ParseDataset.parse(r1, k1);
Assert.assertTrue(fr.vec(0).get_type_str().equals("Time"));
long[] exp = new long[] { // Date, note: these ms counts all presume PST
1390464000000L, 1390550400000L, 1390464000000L, 1390550400000L };
for (// Adjust exp[] to local time
int i = 0; // Adjust exp[] to local time
i < exp.length; // Adjust exp[] to local time
i++) exp[i] += pst.getOffset(exp[i]) - localTZ.getOffset(exp[i]);
Vec vec = fr.vec("Date");
for (int i = 0; i < exp.length; i++) Assert.assertEquals(exp[i], vec.at8(i));
fr.delete();
}
use of water.Key in project h2o-3 by h2oai.
the class ParseTimeTest method testTimeParseNoDate.
@Test
public void testTimeParseNoDate() {
// Just time, no date. Parses as msec on the Unix Epoch start date, i.e. <24*60*60*1000
// HH:mm:ss.S // tenths second
// HH:mm:ss.SSS // milliseconds
// HH:mm:ss.SSSnnnnnn // micros and nanos also
String data = "Time\n" + "0:0:0.0\n" + "0:54:13.0\n" + "10:36:2.0\n" + "10:36:8.0\n" + "10:37:49.0\n" + "11:18:48.0\n" + "11:41:34.0\n" + "11:4:49.0\n" + "12:47:41.0\n" + "3:24:19.0\n" + "3:45:55.0\n" + "3:45:56.0\n" + "3:58:24.0\n" + "6:13:55.0\n" + "6:25:14.0\n" + "7:0:15.0\n" + "7:3:8.0\n" + "8:20:8.0\n";
Key k1 = ParserTest.makeByteVec(data);
Key r1 = Key.make("r1");
Frame fr = ParseDataset.parse(r1, k1);
Assert.assertTrue(fr.vec(0).get_type_str().equals("Time"));
long[] exp = new long[] { // Notice: no TZ at all ==> GMT!
0L, 3253000L, 38162000L, 38168000L, 38269000L, 40728000L, 42094000L, 39889000L, 46061000L, 12259000L, 13555000L, 13556000L, 14304000L, 22435000L, 23114000L, 25215000L, 25388000L, 30008000L };
Vec vec = fr.vec("Time");
for (int i = 0; i < exp.length; i++) Assert.assertEquals(exp[i], vec.at8(i));
fr.delete();
}
Aggregations