Search in sources :

Example 1 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class PerformanceEvaluator method review.

public void review(long records) throws IOException {
    if (0 == records) {
        log.info("No result read, please check EvalConfusionMatrix file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
    }
    PathFinder pathFinder = new PathFinder(modelConfig);
    log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
    BufferedReader reader = null;
    try {
        reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
        review(new CMOIterable(reader), records);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : BufferedReader(java.io.BufferedReader) PathFinder(ml.shifu.shifu.fs.PathFinder) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 2 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class PerformanceEvaluator method review.

public void review() throws IOException {
    PathFinder pathFinder = new PathFinder(modelConfig);
    log.info("Loading confusion matrix in {}", pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()));
    BufferedReader reader = ShifuFileUtils.getReader(pathFinder.getEvalMatrixPath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
    String line = null;
    List<ConfusionMatrixObject> matrixList = new ArrayList<ConfusionMatrixObject>();
    int cnt = 0;
    while ((line = reader.readLine()) != null) {
        cnt++;
        String[] raw = line.split("\\|");
        ConfusionMatrixObject matrix = new ConfusionMatrixObject();
        matrix.setTp(Double.parseDouble(raw[0]));
        matrix.setFp(Double.parseDouble(raw[1]));
        matrix.setFn(Double.parseDouble(raw[2]));
        matrix.setTn(Double.parseDouble(raw[3]));
        matrix.setWeightedTp(Double.parseDouble(raw[4]));
        matrix.setWeightedFp(Double.parseDouble(raw[5]));
        matrix.setWeightedFn(Double.parseDouble(raw[6]));
        matrix.setWeightedTn(Double.parseDouble(raw[7]));
        matrix.setScore(Double.parseDouble(raw[8]));
        matrixList.add(matrix);
    }
    if (0 == cnt) {
        log.info("No result read, please check EvalConfusionMatrix file");
        throw new ShifuException(ShifuErrorCode.ERROR_EVALCONFMTR);
    }
    reader.close();
    review(matrixList, cnt);
}
Also used : BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) PathFinder(ml.shifu.shifu.fs.PathFinder) ConfusionMatrixObject(ml.shifu.shifu.container.ConfusionMatrixObject) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 3 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class BasicModelProcessor method setUp.

/**
 * initialize the config file, pathFinder and other input
 *
 * @param step
 *            Shifu running step
 * @throws Exception
 *             any exception in setup
 */
@SuppressWarnings("incomplete-switch")
protected void setUp(ModelStep step) throws Exception {
    if (hasInitialized()) {
        return;
    }
    // load model configuration and do validation
    loadModelConfig();
    validateModelConfig(step);
    this.pathFinder = new PathFinder(modelConfig, this.getOtherConfigs());
    checkAlgorithmParam();
    LOG.info(String.format("Training Data Soure Location: %s", modelConfig.getDataSet().getSource()));
    switch(step) {
        case INIT:
            break;
        default:
            loadColumnConfig();
            validateColumnConfig();
            // if in stats but stats -c or stats -p or stats -rebin, column update should be called because of
            // such stats steps should all be called after 'shifu stats', this is actually to call VoidUpdater
            boolean strictCallVoidUpdate = (step == ModelStep.STATS) && // && (getBooleanParam(this.params, Constants.IS_COMPUTE_CORR)
            (getBooleanParam(this.params, Constants.IS_COMPUTE_PSI) || getBooleanParam(this.params, Constants.IS_REBIN));
            // update ColumnConfig and save to disk
            ColumnConfigUpdater.updateColumnConfigFlags(modelConfig, columnConfigList, step, strictCallVoidUpdate);
            validateColumnConfigAfterSet();
            saveColumnConfigList();
            break;
    }
    // validate
    switch(step) {
        case NORMALIZE:
        case VARSELECT:
        case TRAIN:
        case EVAL:
            List<String> segs = this.modelConfig.getSegmentFilterExpressions();
            String alg = this.modelConfig.getAlgorithm();
            if (segs.size() > 0 && !(CommonUtils.isNNModel(alg) || CommonUtils.isLRModel(alg))) {
                throw new IllegalArgumentException("Segment expression is only supported in NN or LR model, please check train:algrithm setting in ModelConfig.json.");
            }
            break;
        default:
            break;
    }
}
Also used : PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 4 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ScoreModelWorker method handleMsg.

/*
     * (non-Javadoc)
     * 
     * @see akka.actor.UntypedActor#onReceive(java.lang.Object)
     */
@Override
public void handleMsg(Object message) throws IOException {
    if (message instanceof RunModelResultMessage) {
        log.debug("Received model score data for evaluation");
        RunModelResultMessage msg = (RunModelResultMessage) message;
        if (!resultMap.containsKey(msg.getStreamId())) {
            receivedStreamCnt++;
            resultMap.put(msg.getStreamId(), new StreamBulletin(msg.getStreamId()));
        }
        resultMap.get(msg.getStreamId()).receiveMsge(msg.getMsgId(), msg.isLastMsg());
        List<CaseScoreResult> caseScoreResultList = msg.getScoreResultList();
        StringBuilder buf = new StringBuilder();
        for (CaseScoreResult csResult : caseScoreResultList) {
            buf.setLength(0);
            Map<String, String> rawDataMap = CommonUtils.convertDataIntoMap(csResult.getInputData(), evalConfig.getDataSet().getDataDelimiter(), header);
            // get the tag
            String tag = CommonUtils.trimTag(rawDataMap.get(modelConfig.getTargetColumnName(evalConfig)));
            buf.append(tag);
            // append weight column value
            if (StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName())) {
                String metric = rawDataMap.get(evalConfig.getDataSet().getWeightColumnName());
                buf.append("|" + StringUtils.trimToEmpty(metric));
            } else {
                buf.append("|" + "1.0");
            }
            if (CollectionUtils.isNotEmpty(csResult.getScores())) {
                addModelScoreData(buf, csResult);
            }
            Map<String, CaseScoreResult> subModelScores = csResult.getSubModelScores();
            if (MapUtils.isNotEmpty(subModelScores)) {
                Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
                while (iterator.hasNext()) {
                    Map.Entry<String, CaseScoreResult> entry = iterator.next();
                    CaseScoreResult subCs = entry.getValue();
                    addModelScoreData(buf, subCs);
                }
            }
            // append meta data
            List<String> metaColumns = evalConfig.getAllMetaColumns(modelConfig);
            if (CollectionUtils.isNotEmpty(metaColumns)) {
                for (String columnName : metaColumns) {
                    String value = rawDataMap.get(columnName);
                    buf.append("|" + StringUtils.trimToEmpty(value));
                }
            }
            scoreWriter.write(buf.toString() + "\n");
        }
        if (receivedStreamCnt == msg.getTotalStreamCnt() && hasAllMessageResult(resultMap)) {
            log.info("Finish running scoring, the score file - {} is stored in {}.", new PathFinder(modelConfig).getEvalScorePath(evalConfig).toString(), evalConfig.getDataSet().getSource().name());
            scoreWriter.close();
            // only one message will be sent
            nextActorRef.tell(new EvalResultMessage(1), this.getSelf());
        }
    } else {
        unhandled(message);
    }
}
Also used : PathFinder(ml.shifu.shifu.fs.PathFinder) CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) EvalResultMessage(ml.shifu.shifu.message.EvalResultMessage) Entry(java.util.Map.Entry) RunModelResultMessage(ml.shifu.shifu.message.RunModelResultMessage)

Example 5 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ModelSpecLoaderUtils method getSubModelsCnt.

/**
 * Get how many models for each sub models
 *
 * @param modelConfig
 *            model config
 * @param columnConfigList
 *            list of {@link ColumnConfig}
 * @param evalConfig
 *            eval configuration
 * @param sourceType
 *            {@link SourceType} LOCAL or HDFS?
 * @return the number of models
 */
@SuppressWarnings("deprecation")
public static Map<String, Integer> getSubModelsCnt(ModelConfig modelConfig, List<ColumnConfig> columnConfigList, EvalConfig evalConfig, RawSourceData.SourceType sourceType) {
    FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
    PathFinder pathFinder = new PathFinder(modelConfig);
    String modelsPath = null;
    if (evalConfig == null || StringUtils.isEmpty(evalConfig.getModelsPath())) {
        modelsPath = pathFinder.getModelsPath(sourceType);
    } else {
        modelsPath = evalConfig.getModelsPath();
    }
    Map<String, Integer> subModelsCnt = new TreeMap<String, Integer>();
    try {
        FileStatus[] fsArr = fs.listStatus(new Path(modelsPath));
        for (FileStatus fileStatus : fsArr) {
            if (fileStatus.isDir()) {
                List<FileStatus> subModelSpecFiles = new ArrayList<FileStatus>();
                getModelsAlgAndSpecFiles(fileStatus, sourceType, subModelSpecFiles, new FileStatus[2]);
                if (CollectionUtils.isNotEmpty(subModelSpecFiles)) {
                    subModelsCnt.put(fileStatus.getPath().getName(), subModelSpecFiles.size());
                }
            }
        }
    } catch (IOException e) {
        log.error("Error occurred when finnding sub-models.", e);
    }
    return subModelsCnt;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder)

Aggregations

PathFinder (ml.shifu.shifu.fs.PathFinder)20 Path (org.apache.hadoop.fs.Path)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 File (java.io.File)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)3 ShifuException (ml.shifu.shifu.exception.ShifuException)3 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)2 BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 Scanner (java.util.Scanner)2 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)2 AkkaActorInputMessage (ml.shifu.shifu.message.AkkaActorInputMessage)2 ExceptionMessage (ml.shifu.shifu.message.ExceptionMessage)2 StatsResultMessage (ml.shifu.shifu.message.StatsResultMessage)2 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1