use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.
the class DataPrepareWorker method convertModelResultIntoColScore.
/*
* Convert model result data into column-based
*
* @param evalDataList
* evaluation result list
* @param columnScoreListMap
* (column-id, List<ColumnScoreObject>)
*/
private void convertModelResultIntoColScore(List<CaseScoreResult> scoreResultList, Map<Integer, List<ColumnScoreObject>> columnScoreListMap) {
for (CaseScoreResult scoreResult : scoreResultList) {
Map<String, String> rawDataMap = CommonUtils.convertDataIntoMap(scoreResult.getInputData(), super.modelConfig.getDataSetDelimiter(), this.trainDataHeader);
for (ColumnConfig config : columnConfigList) {
if (config.isFinalSelect()) {
ColumnScoreObject columnScore = new ColumnScoreObject(config.getColumnNum(), rawDataMap.get(config.getColumnName()));
columnScore.setScores(scoreResult.getScores());
columnScore.setMaxScore(scoreResult.getMaxScore());
columnScore.setMinScore(scoreResult.getMinScore());
columnScore.setAvgScore(scoreResult.getAvgScore());
List<ColumnScoreObject> csList = columnScoreListMap.get(config.getColumnNum());
if (csList == null) {
csList = new ArrayList<ColumnScoreObject>();
columnScoreListMap.put(config.getColumnNum(), csList);
}
csList.add(columnScore);
}
}
}
}
use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.
the class ModelRunner method computeNsData.
/**
* Run model to compute score for input NS Data map
*
* @param rawDataNsMap
* - the original input, but key is wrapped by NSColumn
* @return CaseScoreResult - model score
*/
public CaseScoreResult computeNsData(final Map<NSColumn, String> rawDataNsMap) {
if (MapUtils.isEmpty(rawDataNsMap)) {
return null;
}
CaseScoreResult scoreResult = new CaseScoreResult();
if (this.scorer != null) {
ScoreObject so = scorer.scoreNsData(rawDataNsMap);
if (so == null) {
return null;
}
scoreResult.setScores(so.getScores());
scoreResult.setMaxScore(so.getMaxScore());
scoreResult.setMinScore(so.getMinScore());
scoreResult.setAvgScore(so.getMeanScore());
scoreResult.setMedianScore(so.getMedianScore());
scoreResult.setHiddenLayerScores(so.getHiddenLayerScores());
}
if (MapUtils.isNotEmpty(this.subScorers)) {
if (this.isMultiThread && this.subScorers.size() > 1 && this.executorManager == null) {
int threadPoolSize = Math.min(Runtime.getRuntime().availableProcessors(), this.subScorers.size());
this.executorManager = new ExecutorManager<Pair<String, ScoreObject>>(threadPoolSize);
// add a shutdown hook as a safe guard if some one not call close
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
ModelRunner.this.executorManager.forceShutDown();
}
}));
log.info("MultiThread is enabled in ModelRunner, threadPoolSize = " + threadPoolSize);
}
List<Callable<Pair<String, ScoreObject>>> tasks = new ArrayList<Callable<Pair<String, ScoreObject>>>(this.subScorers.size());
Iterator<Map.Entry<String, Scorer>> iterator = this.subScorers.entrySet().iterator();
while (iterator.hasNext()) {
final Map.Entry<String, Scorer> entry = iterator.next();
Callable<Pair<String, ScoreObject>> callable = new Callable<Pair<String, ScoreObject>>() {
@Override
public Pair<String, ScoreObject> call() {
String modelName = entry.getKey();
Scorer subScorer = entry.getValue();
ScoreObject so = subScorer.scoreNsData(rawDataNsMap);
if (so != null) {
return Pair.of(modelName, so);
} else {
return null;
}
}
};
tasks.add(callable);
}
if (this.isMultiThread && this.subScorers.size() > 1) {
List<Pair<String, ScoreObject>> results = this.executorManager.submitTasksAndWaitResults(tasks);
for (Pair<String, ScoreObject> result : results) {
if (result != null) {
scoreResult.addSubModelScore(result.getLeft(), result.getRight());
}
}
} else {
for (Callable<Pair<String, ScoreObject>> task : tasks) {
Pair<String, ScoreObject> result = null;
try {
result = task.call();
} catch (Exception e) {
// do nothing
}
if (result != null) {
scoreResult.addSubModelScore(result.getLeft(), result.getRight());
}
}
}
}
return scoreResult;
}
use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.
the class EvalNormUDF method exec.
public Tuple exec(Tuple input) throws IOException {
if (isCsvFormat) {
String firstCol = ((input.get(0) == null) ? "" : input.get(0).toString());
if (this.headers[0].equals(CommonUtils.normColumnName(firstCol))) {
// TODO what to do if the column value == column name? ...
return null;
}
}
if (this.modelRunner == null && this.isAppendScore) {
// here to initialize modelRunner, this is moved from constructor to here to avoid OOM in client side.
// UDF in pig client will be initialized to get some metadata issues
@SuppressWarnings("deprecation") List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models);
this.modelRunner.setScoreScale(Integer.parseInt(this.scale));
}
Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.headers, this.segFilterSize);
if (MapUtils.isEmpty(rawDataNsMap)) {
return null;
}
Tuple tuple = TupleFactory.getInstance().newTuple();
for (int i = 0; i < this.outputNames.size(); i++) {
String name = this.outputNames.get(i);
String raw = rawDataNsMap.get(new NSColumn(name));
if (i == 0) {
tuple.append(raw);
} else if (i == 1) {
tuple.append(StringUtils.isEmpty(raw) ? "1" : raw);
} else if (i > 1 && i < 2 + validMetaSize) {
// [2, 2 + validMetaSize) are meta columns
tuple.append(raw);
} else {
ColumnConfig columnConfig = this.columnConfigMap.get(name);
List<Double> normVals = Normalizer.normalize(columnConfig, raw, this.modelConfig.getNormalizeStdDevCutOff(), this.modelConfig.getNormalizeType());
if (this.isOutputRaw) {
tuple.append(raw);
}
for (Double normVal : normVals) {
tuple.append(getOutputValue(normVal, true));
}
}
}
if (this.isAppendScore && this.modelRunner != null) {
CaseScoreResult score = this.modelRunner.computeNsData(rawDataNsMap);
if (this.modelRunner == null || this.modelRunner.getModelsCnt() == 0 || score == null) {
tuple.append(-999.0);
} else if (this.scIndex < 0) {
tuple.append(score.getAvgScore());
} else {
tuple.append(score.getScores().get(this.scIndex));
}
}
return tuple;
}
use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.
the class FullScoreUDF method exec.
public Tuple exec(Tuple input) throws IOException {
Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.header, 0);
CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
if (cs == null) {
log.error("Get null result.");
return null;
}
Tuple tuple = TupleFactory.getInstance().newTuple();
tuple.append(cs.getAvgScore());
tuple.append(cs.getMaxScore());
tuple.append(cs.getMinScore());
for (double score : cs.getScores()) {
tuple.append(score);
}
List<String> metaList = modelConfig.getMetaColumnNames();
for (String meta : metaList) {
tuple.append(rawDataNsMap.get(new NSColumn(meta)));
}
return tuple;
}
Aggregations