use of water.util.TwoDimTable in project h2o-3 by h2oai.
the class ModelMetricsMultinomial method getHitRatioTable.
public static TwoDimTable getHitRatioTable(float[] hits) {
String tableHeader = "Top-" + hits.length + " Hit Ratios";
String[] rowHeaders = new String[hits.length];
for (int k = 0; k < hits.length; ++k) rowHeaders[k] = Integer.toString(k + 1);
String[] colHeaders = new String[] { "Hit Ratio" };
String[] colTypes = new String[] { "float" };
String[] colFormats = new String[] { "%f" };
String colHeaderForRowHeaders = "K";
TwoDimTable table = new TwoDimTable(tableHeader, null, /*tableDescription*/
rowHeaders, colHeaders, colTypes, colFormats, colHeaderForRowHeaders);
for (int k = 0; k < hits.length; ++k) table.set(k, 0, hits[k]);
return table;
}
use of water.util.TwoDimTable in project h2o-3 by h2oai.
the class GainsLift method createTwoDimTable.
public TwoDimTable createTwoDimTable() {
if (response_rates == null || Double.isNaN(avg_response_rate))
return null;
TwoDimTable table = new TwoDimTable("Gains/Lift Table", "Avg response rate: " + PrettyPrint.formatPct(avg_response_rate), new String[events.length], new String[] { "Group", "Cumulative Data Fraction", "Lower Threshold", "Lift", "Cumulative Lift", "Response Rate", "Cumulative Response Rate", "Capture Rate", "Cumulative Capture Rate", "Gain", "Cumulative Gain" }, new String[] { "int", "double", "double", "double", "double", "double", "double", "double", "double", "double", "double" }, new String[] { "%d", "%.8f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f", "%5f" }, "");
long sum_e_i = 0;
long sum_n_i = 0;
// E/N
double P = avg_response_rate;
long N = ArrayUtils.sum(observations);
long E = Math.round(N * P);
for (int i = 0; i < events.length; ++i) {
long e_i = events[i];
long n_i = observations[i];
double p_i = response_rates[i];
sum_e_i += e_i;
sum_n_i += n_i;
//can be NaN if P==0
double lift = p_i / P;
//can be NaN if P==0
double sum_lift = (double) sum_e_i / sum_n_i / P;
//group
table.set(i, 0, i + 1);
//cumulative_data_fraction
table.set(i, 1, (double) sum_n_i / N);
//lower_threshold
table.set(i, 2, _quantiles[i]);
//lift
table.set(i, 3, lift);
//cumulative_lift
table.set(i, 4, sum_lift);
//response_rate
table.set(i, 5, p_i);
//cumulative_response_rate
table.set(i, 6, (double) sum_e_i / sum_n_i);
//capture_rate
table.set(i, 7, (double) e_i / E);
//cumulative_capture_rate
table.set(i, 8, (double) sum_e_i / E);
//gain
table.set(i, 9, 100 * (lift - 1));
//cumulative gain
table.set(i, 10, 100 * (sum_lift - 1));
if (i == events.length - 1) {
assert (sum_n_i == N) : "Cumulative data fraction must be 1.0, but is " + (double) sum_n_i / N;
assert (sum_e_i == E) : "Cumulative capture rate must be 1.0, but is " + (double) sum_e_i / E;
if (!Double.isNaN(sum_lift))
assert (Math.abs(sum_lift - 1.0) < 1e-8) : "Cumulative lift must be 1.0, but is " + sum_lift;
assert (Math.abs((double) sum_e_i / sum_n_i - avg_response_rate) < 1e-8) : "Cumulative response rate must be " + avg_response_rate + ", but is " + (double) sum_e_i / sum_n_i;
}
}
return this.table = table;
}
use of water.util.TwoDimTable in project h2o-3 by h2oai.
the class DimensionReductionUtils method createScoringHistoryTableDR.
/**
* Create the scoring history for dimension reduction algorithms like PCA/SVD. We do make the following assumptions
* about your scoring_history. First we assume that you will always have the following field:
* 1. Timestamp: long denoting the time in ms;
* 2. All other fields are double.
*
* The following field will be generated for you automatically: Duration and Iteration.
*
* @param scoreTable: HashMap containing column headers and arraylist containing the history of values collected.
* @param tableName: title/name of your scoring table
* @param startTime: time your model building job was first started.
* @return: TwoDimTable containing the scoring history.
*/
public static TwoDimTable createScoringHistoryTableDR(LinkedHashMap<String, ArrayList> scoreTable, String tableName, long startTime) {
List<String> colHeaders = new ArrayList<>();
List<String> colTypes = new ArrayList<>();
List<String> colFormat = new ArrayList<>();
ArrayList<String> otherTableEntries = new ArrayList<String>();
for (String fieldName : scoreTable.keySet()) {
if (fieldName.equals("Timestamp")) {
colHeaders.add("Timestamp");
colTypes.add("string");
colFormat.add("%s");
colHeaders.add("Duration");
colTypes.add("string");
colFormat.add("%s");
colHeaders.add("Iteration");
colTypes.add("long");
colFormat.add("%d");
} else {
otherTableEntries.add(fieldName);
colHeaders.add(fieldName);
colTypes.add("double");
colFormat.add("%.5f");
}
}
// number of entries of training history
int rows = scoreTable.get("Timestamp").size();
TwoDimTable table = new TwoDimTable(tableName, null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), "");
assert (rows <= table.getRowDim());
for (int row = 0; row < rows; row++) {
int col = 0;
// take care of Timestamp, Duration, Iteration.
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
table.set(row, col++, fmt.print((long) scoreTable.get("Timestamp").get(row)));
table.set(row, col++, PrettyPrint.msecs((long) scoreTable.get("Timestamp").get(row) - startTime, true));
table.set(row, col++, row);
// take care of the extra field
for (int remaining_cols = 0; remaining_cols < otherTableEntries.size(); remaining_cols++) {
table.set(row, col++, (double) scoreTable.get(otherTableEntries.get(remaining_cols)).get(row));
}
}
return table;
}
use of water.util.TwoDimTable in project h2o-3 by h2oai.
the class PartialDependenceTest method weatherBinary.
@Test
public void weatherBinary() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/junit/weather.csv");
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "Date", "RISK_MM", "EvapMM" };
parms._response_column = "RainTomorrow";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
partialDependence._nbins = 33;
partialDependence._cols = new String[] { "Sunshine", "MaxWindPeriod", "WindSpeed9am" };
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
use of water.util.TwoDimTable in project h2o-3 by h2oai.
the class PartialDependenceTest method prostateBinaryPickCols.
@Test
public void prostateBinaryPickCols() {
Frame fr = null;
GBMModel model = null;
PartialDependence partialDependence = null;
try {
// Frame
fr = parse_test_file("smalldata/prostate/prostate.csv");
for (String s : new String[] { "RACE", "GLEASON", "DPROS", "DCAPS", "CAPSULE" }) {
Vec v = fr.remove(s);
fr.add(s, v.toCategoricalVec());
v.remove();
}
DKV.put(fr);
// Model
GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
parms._train = fr._key;
parms._ignored_columns = new String[] { "ID" };
parms._response_column = "CAPSULE";
model = new GBM(parms).trainModel().get();
// PartialDependence
partialDependence = new PartialDependence(Key.<PartialDependence>make());
//pick columns manually
partialDependence._cols = new String[] { "DPROS", "GLEASON" };
partialDependence._nbins = 10;
partialDependence._model_id = (Key) model._key;
partialDependence._frame_id = fr._key;
partialDependence.execImpl().get();
for (TwoDimTable t : partialDependence._partial_dependence_data) Log.info(t);
Assert.assertTrue(partialDependence._partial_dependence_data.length == 2);
} finally {
if (fr != null)
fr.remove();
if (model != null)
model.remove();
if (partialDependence != null)
partialDependence.remove();
}
}
Aggregations