use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class PerformanceEvaluator method review.
public void review(long records) throws IOException {
if (0 == records) {
log.info("No result read, please check EvalConfusionMatrix file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
}
PathFinder pathFinder = new PathFinder(modelConfig);
log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
BufferedReader reader = null;
try {
reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
review(new CMOIterable(reader), records);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class PerformanceEvaluator method review.
public void review() throws IOException {
PathFinder pathFinder = new PathFinder(modelConfig);
log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
BufferedReader reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
String line = null;
List<ConfusionMatrixObject> matrixList = new ArrayList<ConfusionMatrixObject>();
int cnt = 0;
while ((line = reader.readLine()) != null) {
cnt++;
String[] raw = line.split("\\|");
ConfusionMatrixObject matrix = new ConfusionMatrixObject();
matrix.setTp(Double.parseDouble(raw[0]));
matrix.setFp(Double.parseDouble(raw[1]));
matrix.setFn(Double.parseDouble(raw[2]));
matrix.setTn(Double.parseDouble(raw[3]));
matrix.setWeightedTp(Double.parseDouble(raw[4]));
matrix.setWeightedFp(Double.parseDouble(raw[5]));
matrix.setWeightedFn(Double.parseDouble(raw[6]));
matrix.setWeightedTn(Double.parseDouble(raw[7]));
matrix.setScore(Double.parseDouble(raw[8]));
matrixList.add(matrix);
}
if (0 == cnt) {
log.info("No result read, please check EvalConfusionMatrix file");
throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
}
reader.close();
review(matrixList, cnt);
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class BasicModelProcessor method setUp.
/**
* initialize the config file, pathFinder and other input
*
* @param step
* Shifu running step
* @throws Exception
* any exception in setup
*/
@SuppressWarnings("incomplete-switch")
protected void setUp(ModelStep step) throws Exception {
if (hasInitialized()) {
return;
}
// load model configuration and do validation
loadModelConfig();
validateModelConfig(step);
this.pathFinder = new PathFinder(modelConfig, this.getOtherConfigs());
checkAlgorithmParam();
LOG.info(String.format("Training Data Soure Location: %s", modelConfig.getDataSet().getSource()));
switch(step) {
case INIT:
break;
default:
loadColumnConfig();
validateColumnConfig();
// if in stats but stats -c or stats -p or stats -rebin, column update should be called because of
// such stats steps should all be called after 'shifu stats', this is actually to call VoidUpdater
boolean strictCallVoidUpdate = (step == ModelStep.STATS) && // && (getBooleanParam(this.params, Constants.IS_COMPUTE_CORR)
(getBooleanParam(this.params, Constants.IS_COMPUTE_PSI) || getBooleanParam(this.params, Constants.IS_REBIN));
// update ColumnConfig and save to disk
ColumnConfigUpdater.updateColumnConfigFlags(modelConfig, columnConfigList, step, strictCallVoidUpdate);
validateColumnConfigAfterSet();
saveColumnConfigList();
break;
}
// validate
switch(step) {
case NORMALIZE:
case VARSELECT:
case TRAIN:
case EVAL:
List<String> segs = this.modelConfig.getSegmentFilterExpressions();
String alg = this.modelConfig.getAlgorithm();
if (segs.size() > 0 && !(CommonUtils.isNNModel(alg) || CommonUtils.isLRModel(alg))) {
throw new IllegalArgumentException("Segment expression is only supported in NN or LR model, please check train:algrithm setting in ModelConfig.json.");
}
break;
default:
break;
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ScoreModelWorker method handleMsg.
/*
* (non-Javadoc)
*
* @see akka.actor.UntypedActor#onReceive(java.lang.Object)
*/
@Override
public void handleMsg(Object message) throws IOException {
if (message instanceof RunModelResultMessage) {
log.debug("Received model score data for evaluation");
RunModelResultMessage msg = (RunModelResultMessage) message;
if (!resultMap.containsKey(msg.getStreamId())) {
receivedStreamCnt++;
resultMap.put(msg.getStreamId(), new StreamBulletin(msg.getStreamId()));
}
resultMap.get(msg.getStreamId()).receiveMsge(msg.getMsgId(), msg.isLastMsg());
List<CaseScoreResult> caseScoreResultList = msg.getScoreResultList();
StringBuilder buf = new StringBuilder();
for (CaseScoreResult csResult : caseScoreResultList) {
buf.setLength(0);
Map<String, String> rawDataMap = CommonUtils.convertDataIntoMap(csResult.getInputData(), evalConfig.getDataSet().getDataDelimiter(), header);
// get the tag
String tag = CommonUtils.trimTag(rawDataMap.get(modelConfig.getTargetColumnName(evalConfig)));
buf.append(tag);
// append weight column value
if (StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName())) {
String metric = rawDataMap.get(evalConfig.getDataSet().getWeightColumnName());
buf.append("|" + StringUtils.trimToEmpty(metric));
} else {
buf.append("|" + "1.0");
}
if (CollectionUtils.isNotEmpty(csResult.getScores())) {
addModelScoreData(buf, csResult);
}
Map<String, CaseScoreResult> subModelScores = csResult.getSubModelScores();
if (MapUtils.isNotEmpty(subModelScores)) {
Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, CaseScoreResult> entry = iterator.next();
CaseScoreResult subCs = entry.getValue();
addModelScoreData(buf, subCs);
}
}
// append meta data
List<String> metaColumns = evalConfig.getAllMetaColumns(modelConfig);
if (CollectionUtils.isNotEmpty(metaColumns)) {
for (String columnName : metaColumns) {
String value = rawDataMap.get(columnName);
buf.append("|" + StringUtils.trimToEmpty(value));
}
}
scoreWriter.write(buf.toString() + "\n");
}
if (receivedStreamCnt == msg.getTotalStreamCnt() && hasAllMessageResult(resultMap)) {
log.info("Finish running scoring, the score file - {} is stored in {}.", new PathFinder(modelConfig).getEvalScorePath(evalConfig).toString(), evalConfig.getDataSet().getSource().name());
scoreWriter.close();
// only one message will be sent
nextActorRef.tell(new EvalResultMessage(1), this.getSelf());
}
} else {
unhandled(message);
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ModelSpecLoaderUtils method getSubModelsCnt.
/**
* Get how many models for each sub models
*
* @param modelConfig
* model config
* @param columnConfigList
* list of {@link ColumnConfig}
* @param evalConfig
* eval configuration
* @param sourceType
* {@link SourceType} LOCAL or HDFS?
* @return the number of models
*/
@SuppressWarnings("deprecation")
public static Map<String, Integer> getSubModelsCnt(ModelConfig modelConfig, List<ColumnConfig> columnConfigList, EvalConfig evalConfig, RawSourceData.SourceType sourceType) {
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
PathFinder pathFinder = new PathFinder(modelConfig);
String modelsPath = null;
if (evalConfig == null || StringUtils.isEmpty(evalConfig.getModelsPath())) {
modelsPath = pathFinder.getModelsPath(sourceType);
} else {
modelsPath = evalConfig.getModelsPath();
}
Map<String, Integer> subModelsCnt = new TreeMap<String, Integer>();
try {
FileStatus[] fsArr = fs.listStatus(new Path(modelsPath));
for (FileStatus fileStatus : fsArr) {
if (fileStatus.isDir()) {
List<FileStatus> subModelSpecFiles = new ArrayList<FileStatus>();
getModelsAlgAndSpecFiles(fileStatus, sourceType, subModelSpecFiles, new FileStatus[2]);
if (CollectionUtils.isNotEmpty(subModelSpecFiles)) {
subModelsCnt.put(fileStatus.getPath().getName(), subModelSpecFiles.size());
}
}
}
} catch (IOException e) {
log.error("Error occurred when finnding sub-models.", e);
}
return subModelsCnt;
}
Aggregations