Search in sources :

Example 6 with CaseScoreResult

use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.

the class DataPrepareWorker method convertModelResultIntoColScore.

/*
     * Convert model result data into column-based
     * 
     * @param evalDataList
     *            evaluation result list
     * @param columnScoreListMap
     *            (column-id, List<ColumnScoreObject>)
     */
private void convertModelResultIntoColScore(List<CaseScoreResult> scoreResultList, Map<Integer, List<ColumnScoreObject>> columnScoreListMap) {
    for (CaseScoreResult scoreResult : scoreResultList) {
        Map<String, String> rawDataMap = CommonUtils.convertDataIntoMap(scoreResult.getInputData(), super.modelConfig.getDataSetDelimiter(), this.trainDataHeader);
        for (ColumnConfig config : columnConfigList) {
            if (config.isFinalSelect()) {
                ColumnScoreObject columnScore = new ColumnScoreObject(config.getColumnNum(), rawDataMap.get(config.getColumnName()));
                columnScore.setScores(scoreResult.getScores());
                columnScore.setMaxScore(scoreResult.getMaxScore());
                columnScore.setMinScore(scoreResult.getMinScore());
                columnScore.setAvgScore(scoreResult.getAvgScore());
                List<ColumnScoreObject> csList = columnScoreListMap.get(config.getColumnNum());
                if (csList == null) {
                    csList = new ArrayList<ColumnScoreObject>();
                    columnScoreListMap.put(config.getColumnNum(), csList);
                }
                csList.add(columnScore);
            }
        }
    }
}
Also used : CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) ColumnScoreObject(ml.shifu.shifu.container.ColumnScoreObject) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig)

Example 7 with CaseScoreResult

use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.

the class ModelRunner method computeNsData.

/**
 * Run model to compute score for input NS Data map
 *
 * @param rawDataNsMap
 *            - the original input, but key is wrapped by NSColumn
 * @return CaseScoreResult - model score
 */
public CaseScoreResult computeNsData(final Map<NSColumn, String> rawDataNsMap) {
    if (MapUtils.isEmpty(rawDataNsMap)) {
        return null;
    }
    CaseScoreResult scoreResult = new CaseScoreResult();
    if (this.scorer != null) {
        ScoreObject so = scorer.scoreNsData(rawDataNsMap);
        if (so == null) {
            return null;
        }
        scoreResult.setScores(so.getScores());
        scoreResult.setMaxScore(so.getMaxScore());
        scoreResult.setMinScore(so.getMinScore());
        scoreResult.setAvgScore(so.getMeanScore());
        scoreResult.setMedianScore(so.getMedianScore());
        scoreResult.setHiddenLayerScores(so.getHiddenLayerScores());
    }
    if (MapUtils.isNotEmpty(this.subScorers)) {
        if (this.isMultiThread && this.subScorers.size() > 1 && this.executorManager == null) {
            int threadPoolSize = Math.min(Runtime.getRuntime().availableProcessors(), this.subScorers.size());
            this.executorManager = new ExecutorManager<Pair<String, ScoreObject>>(threadPoolSize);
            // add a shutdown hook as a safe guard if some one not call close
            Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

                @Override
                public void run() {
                    ModelRunner.this.executorManager.forceShutDown();
                }
            }));
            log.info("MultiThread is enabled in ModelRunner, threadPoolSize = " + threadPoolSize);
        }
        List<Callable<Pair<String, ScoreObject>>> tasks = new ArrayList<Callable<Pair<String, ScoreObject>>>(this.subScorers.size());
        Iterator<Map.Entry<String, Scorer>> iterator = this.subScorers.entrySet().iterator();
        while (iterator.hasNext()) {
            final Map.Entry<String, Scorer> entry = iterator.next();
            Callable<Pair<String, ScoreObject>> callable = new Callable<Pair<String, ScoreObject>>() {

                @Override
                public Pair<String, ScoreObject> call() {
                    String modelName = entry.getKey();
                    Scorer subScorer = entry.getValue();
                    ScoreObject so = subScorer.scoreNsData(rawDataNsMap);
                    if (so != null) {
                        return Pair.of(modelName, so);
                    } else {
                        return null;
                    }
                }
            };
            tasks.add(callable);
        }
        if (this.isMultiThread && this.subScorers.size() > 1) {
            List<Pair<String, ScoreObject>> results = this.executorManager.submitTasksAndWaitResults(tasks);
            for (Pair<String, ScoreObject> result : results) {
                if (result != null) {
                    scoreResult.addSubModelScore(result.getLeft(), result.getRight());
                }
            }
        } else {
            for (Callable<Pair<String, ScoreObject>> task : tasks) {
                Pair<String, ScoreObject> result = null;
                try {
                    result = task.call();
                } catch (Exception e) {
                // do nothing
                }
                if (result != null) {
                    scoreResult.addSubModelScore(result.getLeft(), result.getRight());
                }
            }
        }
    }
    return scoreResult;
}
Also used : ScoreObject(ml.shifu.shifu.container.ScoreObject) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) TreeMap(java.util.TreeMap) Map(java.util.Map) Pair(org.apache.commons.lang3.tuple.Pair)

Example 8 with CaseScoreResult

use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.

the class EvalNormUDF method exec.

public Tuple exec(Tuple input) throws IOException {
    if (isCsvFormat) {
        String firstCol = ((input.get(0) == null) ? "" : input.get(0).toString());
        if (this.headers[0].equals(CommonUtils.normColumnName(firstCol))) {
            // TODO what to do if the column value == column name? ...
            return null;
        }
    }
    if (this.modelRunner == null && this.isAppendScore) {
        // here to initialize modelRunner, this is moved from constructor to here to avoid OOM in client side.
        // UDF in pig client will be initialized to get some metadata issues
        @SuppressWarnings("deprecation") List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
        this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models);
        this.modelRunner.setScoreScale(Integer.parseInt(this.scale));
    }
    Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.headers, this.segFilterSize);
    if (MapUtils.isEmpty(rawDataNsMap)) {
        return null;
    }
    Tuple tuple = TupleFactory.getInstance().newTuple();
    for (int i = 0; i < this.outputNames.size(); i++) {
        String name = this.outputNames.get(i);
        String raw = rawDataNsMap.get(new NSColumn(name));
        if (i == 0) {
            tuple.append(raw);
        } else if (i == 1) {
            tuple.append(StringUtils.isEmpty(raw) ? "1" : raw);
        } else if (i > 1 && i < 2 + validMetaSize) {
            // [2, 2 + validMetaSize) are meta columns
            tuple.append(raw);
        } else {
            ColumnConfig columnConfig = this.columnConfigMap.get(name);
            List<Double> normVals = Normalizer.normalize(columnConfig, raw, this.modelConfig.getNormalizeStdDevCutOff(), this.modelConfig.getNormalizeType());
            if (this.isOutputRaw) {
                tuple.append(raw);
            }
            for (Double normVal : normVals) {
                tuple.append(getOutputValue(normVal, true));
            }
        }
    }
    if (this.isAppendScore && this.modelRunner != null) {
        CaseScoreResult score = this.modelRunner.computeNsData(rawDataNsMap);
        if (this.modelRunner == null || this.modelRunner.getModelsCnt() == 0 || score == null) {
            tuple.append(-999.0);
        } else if (this.scIndex < 0) {
            tuple.append(score.getAvgScore());
        } else {
            tuple.append(score.getScores().get(this.scIndex));
        }
    }
    return tuple;
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) BasicML(org.encog.ml.BasicML) CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) Tuple(org.apache.pig.data.Tuple) ModelRunner(ml.shifu.shifu.core.ModelRunner) NSColumn(ml.shifu.shifu.column.NSColumn)

Example 9 with CaseScoreResult

use of ml.shifu.shifu.container.CaseScoreResult in project shifu by ShifuML.

the class FullScoreUDF method exec.

public Tuple exec(Tuple input) throws IOException {
    Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.header, 0);
    CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
    if (cs == null) {
        log.error("Get null result.");
        return null;
    }
    Tuple tuple = TupleFactory.getInstance().newTuple();
    tuple.append(cs.getAvgScore());
    tuple.append(cs.getMaxScore());
    tuple.append(cs.getMinScore());
    for (double score : cs.getScores()) {
        tuple.append(score);
    }
    List<String> metaList = modelConfig.getMetaColumnNames();
    for (String meta : metaList) {
        tuple.append(rawDataNsMap.get(new NSColumn(meta)));
    }
    return tuple;
}
Also used : CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) Tuple(org.apache.pig.data.Tuple) NSColumn(ml.shifu.shifu.column.NSColumn)

Aggregations

CaseScoreResult (ml.shifu.shifu.container.CaseScoreResult)9 NSColumn (ml.shifu.shifu.column.NSColumn)4 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)3 Tuple (org.apache.pig.data.Tuple)3 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 ModelRunner (ml.shifu.shifu.core.ModelRunner)2 RunModelResultMessage (ml.shifu.shifu.message.RunModelResultMessage)2 BasicML (org.encog.ml.BasicML)2 SortedMap (java.util.SortedMap)1 TreeMap (java.util.TreeMap)1 Callable (java.util.concurrent.Callable)1 ColumnScoreObject (ml.shifu.shifu.container.ColumnScoreObject)1 ScoreObject (ml.shifu.shifu.container.ScoreObject)1 ModelSpec (ml.shifu.shifu.core.model.ModelSpec)1 BinStats (ml.shifu.shifu.core.posttrain.FeatureStatsWritable.BinStats)1 PathFinder (ml.shifu.shifu.fs.PathFinder)1 EvalResultMessage (ml.shifu.shifu.message.EvalResultMessage)1 RunModelDataMessage (ml.shifu.shifu.message.RunModelDataMessage)1