use of ml.shifu.shifu.container.ScoreObject in project shifu by ShifuML.
the class Scorer method scoreNsData.
public ScoreObject scoreNsData(MLDataPair inputPair, Map<NSColumn, String> rawNsDataMap) {
if (inputPair == null && !this.alg.equalsIgnoreCase(NNConstants.NN_ALG_NAME)) {
inputPair = NormalUtils.assembleNsDataPair(binCategoryMap, noVarSelect, modelConfig, selectedColumnConfigList, rawNsDataMap, cutoff, alg);
}
// clear cache
this.cachedNormDataPair.clear();
final MLDataPair pair = inputPair;
List<MLData> modelResults = new ArrayList<MLData>();
List<Callable<MLData>> tasks = null;
if (this.multiThread) {
tasks = new ArrayList<Callable<MLData>>();
}
for (final BasicML model : models) {
// TODO, check if no need 'if' condition and refactor two if for loops please
if (model instanceof BasicFloatNetwork || model instanceof NNModel) {
final BasicFloatNetwork network = (model instanceof BasicFloatNetwork) ? (BasicFloatNetwork) model : ((NNModel) model).getIndependentNNModel().getBasicNetworks().get(0);
String cacheKey = featureSetToString(network.getFeatureSet());
MLDataPair dataPair = cachedNormDataPair.get(cacheKey);
if (dataPair == null) {
dataPair = NormalUtils.assembleNsDataPair(binCategoryMap, noVarSelect, modelConfig, selectedColumnConfigList, rawNsDataMap, cutoff, alg, network.getFeatureSet());
cachedNormDataPair.put(cacheKey, dataPair);
}
final MLDataPair networkPair = dataPair;
/*
* if(network.getFeatureSet().size() != networkPair.getInput().size()) {
* log.error("Network and input size mismatch: Network Size = " + network.getFeatureSet().size()
* + "; Input Size = " + networkPair.getInput().size());
* continue;
* }
*/
if (System.currentTimeMillis() % 1000 == 0L) {
log.info("Network input count = {}, while input size = {}", network.getInputCount(), networkPair.getInput().size());
}
final int fnlOutputHiddenLayerIndex = outputHiddenLayerIndex;
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
MLData finalOutput = network.compute(networkPair.getInput());
if (fnlOutputHiddenLayerIndex == 0) {
return finalOutput;
}
// append output values in hidden layer
double[] hiddenOutputs = network.getLayerOutput(fnlOutputHiddenLayerIndex);
double[] outputs = new double[finalOutput.getData().length + hiddenOutputs.length];
System.arraycopy(finalOutput.getData(), 0, outputs, 0, finalOutput.getData().length);
System.arraycopy(hiddenOutputs, 0, outputs, finalOutput.getData().length, hiddenOutputs.length);
return new BasicMLData(outputs);
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else if (model instanceof BasicNetwork) {
final BasicNetwork network = (BasicNetwork) model;
final MLDataPair networkPair = NormalUtils.assembleNsDataPair(binCategoryMap, noVarSelect, modelConfig, columnConfigList, rawNsDataMap, cutoff, alg, null);
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
return network.compute(networkPair.getInput());
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else if (model instanceof SVM) {
final SVM svm = (SVM) model;
if (svm.getInputCount() != pair.getInput().size()) {
log.error("SVM and input size mismatch: SVM Size = " + svm.getInputCount() + "; Input Size = " + pair.getInput().size());
continue;
}
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
return svm.compute(pair.getInput());
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else if (model instanceof LR) {
final LR lr = (LR) model;
if (lr.getInputCount() != pair.getInput().size()) {
log.error("LR and input size mismatch: LR Size = " + lr.getInputCount() + "; Input Size = " + pair.getInput().size());
continue;
}
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
return lr.compute(pair.getInput());
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else if (model instanceof TreeModel) {
final TreeModel tm = (TreeModel) model;
if (tm.getInputCount() != pair.getInput().size()) {
throw new RuntimeException("GBDT and input size mismatch: tm input Size = " + tm.getInputCount() + "; data input Size = " + pair.getInput().size());
}
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
MLData result = tm.compute(pair.getInput());
return result;
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else if (model instanceof GenericModel) {
Callable<MLData> callable = new Callable<MLData>() {
@Override
public MLData call() {
return ((GenericModel) model).compute(pair.getInput());
}
};
if (multiThread) {
tasks.add(callable);
} else {
try {
modelResults.add(callable.call());
} catch (Exception e) {
log.error("error in model evaluation", e);
}
}
} else {
throw new RuntimeException("unsupport models");
}
}
List<Double> scores = new ArrayList<Double>();
List<Integer> rfTreeSizeList = new ArrayList<Integer>();
SortedMap<String, Double> hiddenOutputs = null;
if (CollectionUtils.isNotEmpty(modelResults) || CollectionUtils.isNotEmpty(tasks)) {
int modelSize = modelResults.size() > 0 ? modelResults.size() : tasks.size();
if (modelSize != this.models.size()) {
log.error("Get model results size doesn't match with models size.");
return null;
}
if (multiThread) {
modelResults = this.executorManager.submitTasksAndWaitResults(tasks);
} else {
// not multi-thread, modelResults is directly being populated in callable.call
}
if (this.outputHiddenLayerIndex != 0) {
hiddenOutputs = new TreeMap<String, Double>(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
String[] split1 = o1.split("_");
String[] split2 = o2.split("_");
int model1Index = Integer.parseInt(split1[1]);
int model2Index = Integer.parseInt(split2[1]);
if (model1Index > model2Index) {
return 1;
} else if (model1Index < model2Index) {
return -1;
} else {
int hidden1Index = Integer.parseInt(split1[2]);
int hidden2Index = Integer.parseInt(split2[2]);
if (hidden1Index > hidden2Index) {
return 1;
} else if (hidden1Index < hidden2Index) {
return -1;
} else {
int hidden11Index = Integer.parseInt(split1[3]);
int hidden22Index = Integer.parseInt(split2[3]);
return Integer.valueOf(hidden11Index).compareTo(Integer.valueOf(hidden22Index));
}
}
}
});
}
for (int i = 0; i < this.models.size(); i++) {
BasicML model = this.models.get(i);
MLData score = modelResults.get(i);
if (model instanceof BasicNetwork || model instanceof NNModel) {
if (modelConfig != null && modelConfig.isRegression()) {
scores.add(toScore(score.getData(0)));
if (this.outputHiddenLayerIndex != 0) {
for (int j = 1; j < score.getData().length; j++) {
hiddenOutputs.put("model_" + i + "_" + this.outputHiddenLayerIndex + "_" + (j - 1), score.getData()[j]);
}
}
} else if (modelConfig != null && modelConfig.isClassification() && modelConfig.getTrain().isOneVsAll()) {
// if one vs all classification
scores.add(toScore(score.getData(0)));
} else {
double[] outputs = score.getData();
for (double d : outputs) {
scores.add(toScore(d));
}
}
} else if (model instanceof SVM) {
scores.add(toScore(score.getData(0)));
} else if (model instanceof LR) {
scores.add(toScore(score.getData(0)));
} else if (model instanceof TreeModel) {
if (modelConfig.isClassification() && !modelConfig.getTrain().isOneVsAll()) {
double[] scoreArray = score.getData();
for (double sc : scoreArray) {
scores.add(sc);
}
} else {
// if one vs all multiple classification or regression
scores.add(toScore(score.getData(0)));
}
final TreeModel tm = (TreeModel) model;
// regression for RF
if (!tm.isClassfication() && !tm.isGBDT()) {
rfTreeSizeList.add(tm.getTrees().size());
}
} else if (model instanceof GenericModel) {
scores.add(toScore(score.getData(0)));
} else {
throw new RuntimeException("unsupport models");
}
}
}
Integer tag = Constants.DEFAULT_IDEAL_VALUE;
if (scores.size() == 0 && System.currentTimeMillis() % 100 == 0) {
log.warn("No Scores Calculated...");
}
return new ScoreObject(scores, tag, rfTreeSizeList, hiddenOutputs);
}
use of ml.shifu.shifu.container.ScoreObject in project shifu by ShifuML.
the class ScorerTest method scoreTest.
// @Test
public void scoreTest() {
List<ColumnConfig> list = new ArrayList<ColumnConfig>();
ColumnConfig col = new ColumnConfig();
col.setColumnType(ColumnType.N);
col.setColumnName("A");
col.setColumnNum(0);
col.setFinalSelect(true);
list.add(col);
col = new ColumnConfig();
col.setColumnType(ColumnType.N);
col.setColumnName("B");
col.setColumnNum(1);
col.setFinalSelect(true);
list.add(col);
Scorer s = new Scorer(models, list, "NN", modelConfig);
double[] input = { 0., 0. };
double[] ideal = { 1. };
MLDataPair pair = new BasicMLDataPair(new BasicMLData(input), new BasicMLData(ideal));
ScoreObject o = s.score(pair, null);
List<Double> scores = o.getScores();
Assert.assertTrue(scores.get(0) > 400);
Assert.assertTrue(scores.get(1) == 1000);
}
use of ml.shifu.shifu.container.ScoreObject in project shifu by ShifuML.
the class ModelRunner method computeNsData.
/**
* Run model to compute score for input NS Data map
*
* @param rawDataNsMap
* - the original input, but key is wrapped by NSColumn
* @return CaseScoreResult - model score
*/
public CaseScoreResult computeNsData(final Map<NSColumn, String> rawDataNsMap) {
if (MapUtils.isEmpty(rawDataNsMap)) {
return null;
}
CaseScoreResult scoreResult = new CaseScoreResult();
if (this.scorer != null) {
ScoreObject so = scorer.scoreNsData(rawDataNsMap);
if (so == null) {
return null;
}
scoreResult.setScores(so.getScores());
scoreResult.setMaxScore(so.getMaxScore());
scoreResult.setMinScore(so.getMinScore());
scoreResult.setAvgScore(so.getMeanScore());
scoreResult.setMedianScore(so.getMedianScore());
scoreResult.setHiddenLayerScores(so.getHiddenLayerScores());
}
if (MapUtils.isNotEmpty(this.subScorers)) {
if (this.isMultiThread && this.subScorers.size() > 1 && this.executorManager == null) {
int threadPoolSize = Math.min(Runtime.getRuntime().availableProcessors(), this.subScorers.size());
this.executorManager = new ExecutorManager<Pair<String, ScoreObject>>(threadPoolSize);
// add a shutdown hook as a safe guard if some one not call close
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
ModelRunner.this.executorManager.forceShutDown();
}
}));
log.info("MultiThread is enabled in ModelRunner, threadPoolSize = " + threadPoolSize);
}
List<Callable<Pair<String, ScoreObject>>> tasks = new ArrayList<Callable<Pair<String, ScoreObject>>>(this.subScorers.size());
Iterator<Map.Entry<String, Scorer>> iterator = this.subScorers.entrySet().iterator();
while (iterator.hasNext()) {
final Map.Entry<String, Scorer> entry = iterator.next();
Callable<Pair<String, ScoreObject>> callable = new Callable<Pair<String, ScoreObject>>() {
@Override
public Pair<String, ScoreObject> call() {
String modelName = entry.getKey();
Scorer subScorer = entry.getValue();
ScoreObject so = subScorer.scoreNsData(rawDataNsMap);
if (so != null) {
return Pair.of(modelName, so);
} else {
return null;
}
}
};
tasks.add(callable);
}
if (this.isMultiThread && this.subScorers.size() > 1) {
List<Pair<String, ScoreObject>> results = this.executorManager.submitTasksAndWaitResults(tasks);
for (Pair<String, ScoreObject> result : results) {
if (result != null) {
scoreResult.addSubModelScore(result.getLeft(), result.getRight());
}
}
} else {
for (Callable<Pair<String, ScoreObject>> task : tasks) {
Pair<String, ScoreObject> result = null;
try {
result = task.call();
} catch (Exception e) {
// do nothing
}
if (result != null) {
scoreResult.addSubModelScore(result.getLeft(), result.getRight());
}
}
}
}
return scoreResult;
}
Aggregations