Search in sources :

Example 16 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class CommonUtils method updateColumnConfigFlags.

/**
 * Update target, listMeta, listForceSelect, listForceRemove
 *
 * @param modelConfig
 *            model config list
 * @param columnConfigList
 *            the column config list
 * @throws IOException
 *             any io exception
 *
 * @throws IllegalArgumentException
 *             if modelConfig is null or columnConfigList is null.
 */
public static void updateColumnConfigFlags(ModelConfig modelConfig, List<ColumnConfig> columnConfigList) throws IOException {
    String targetColumnName = modelConfig.getTargetColumnName();
    String weightColumnName = modelConfig.getWeightColumnName();
    Set<NSColumn> setCategorialColumns = new HashSet<NSColumn>();
    List<String> categoricalColumnNames = modelConfig.getCategoricalColumnNames();
    if (CollectionUtils.isNotEmpty(categoricalColumnNames)) {
        for (String column : categoricalColumnNames) {
            setCategorialColumns.add(new NSColumn(column));
        }
    }
    Set<NSColumn> setHybridColumns = new HashSet<NSColumn>();
    Map<String, Double> hybridColumnNames = modelConfig.getHybridColumnNames();
    if (hybridColumnNames != null && hybridColumnNames.size() > 0) {
        for (Entry<String, Double> entry : hybridColumnNames.entrySet()) {
            setHybridColumns.add(new NSColumn(entry.getKey()));
        }
    }
    Set<NSColumn> setMeta = new HashSet<NSColumn>();
    if (CollectionUtils.isNotEmpty(modelConfig.getMetaColumnNames())) {
        for (String meta : modelConfig.getMetaColumnNames()) {
            setMeta.add(new NSColumn(meta));
        }
    }
    Set<NSColumn> setForceRemove = new HashSet<NSColumn>();
    if (Boolean.TRUE.equals(modelConfig.getVarSelect().getForceEnable()) && CollectionUtils.isNotEmpty(modelConfig.getListForceRemove())) {
        // if we need to update force remove, only and if one the force is enabled
        for (String forceRemoveName : modelConfig.getListForceRemove()) {
            setForceRemove.add(new NSColumn(forceRemoveName));
        }
    }
    Set<NSColumn> setForceSelect = new HashSet<NSColumn>(512);
    if (Boolean.TRUE.equals(modelConfig.getVarSelect().getForceEnable()) && CollectionUtils.isNotEmpty(modelConfig.getListForceSelect())) {
        // if we need to update force select, only and if one the force is enabled
        for (String forceSelectName : modelConfig.getListForceSelect()) {
            setForceSelect.add(new NSColumn(forceSelectName));
        }
    }
    for (ColumnConfig config : columnConfigList) {
        String varName = config.getColumnName();
        // reset it
        config.setColumnFlag(null);
        if (NSColumnUtils.isColumnEqual(weightColumnName, varName)) {
            config.setColumnFlag(ColumnFlag.Weight);
            // reset final select
            config.setFinalSelect(false);
        } else if (NSColumnUtils.isColumnEqual(targetColumnName, varName)) {
            config.setColumnFlag(ColumnFlag.Target);
            // reset final select
            config.setFinalSelect(false);
        } else if (setMeta.contains(new NSColumn(varName))) {
            config.setColumnFlag(ColumnFlag.Meta);
            // reset final select
            config.setFinalSelect(false);
        } else if (setForceRemove.contains(new NSColumn(varName))) {
            config.setColumnFlag(ColumnFlag.ForceRemove);
            // reset final select
            config.setFinalSelect(false);
        } else if (setForceSelect.contains(new NSColumn(varName))) {
            config.setColumnFlag(ColumnFlag.ForceSelect);
        }
        if (NSColumnUtils.isColumnEqual(weightColumnName, varName)) {
            // weight column is numerical
            config.setColumnType(ColumnType.N);
        } else if (NSColumnUtils.isColumnEqual(targetColumnName, varName)) {
            // target column is set to categorical column
            config.setColumnType(ColumnType.C);
        } else if (setHybridColumns.contains(new NSColumn(varName))) {
            config.setColumnType(ColumnType.H);
            String newVarName = null;
            if (Environment.getBoolean(Constants.SHIFU_NAMESPACE_STRICT_MODE, false)) {
                newVarName = new NSColumn(varName).getFullColumnName();
            } else {
                newVarName = new NSColumn(varName).getSimpleName();
            }
            config.setHybridThreshold(hybridColumnNames.get(newVarName));
        } else if (setCategorialColumns.contains(new NSColumn(varName))) {
            config.setColumnType(ColumnType.C);
        } else {
            config.setColumnType(ColumnType.N);
        }
    }
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) NSColumn(ml.shifu.shifu.column.NSColumn)

Example 17 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class FullScoreUDF method exec.

public Tuple exec(Tuple input) throws IOException {
    Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.header, 0);
    CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
    if (cs == null) {
        log.error("Get null result.");
        return null;
    }
    Tuple tuple = TupleFactory.getInstance().newTuple();
    tuple.append(cs.getAvgScore());
    tuple.append(cs.getMaxScore());
    tuple.append(cs.getMinScore());
    for (double score : cs.getScores()) {
        tuple.append(score);
    }
    List<String> metaList = modelConfig.getMetaColumnNames();
    for (String meta : metaList) {
        tuple.append(rawDataNsMap.get(new NSColumn(meta)));
    }
    return tuple;
}
Also used : CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) Tuple(org.apache.pig.data.Tuple) NSColumn(ml.shifu.shifu.column.NSColumn)

Aggregations

NSColumn (ml.shifu.shifu.column.NSColumn)17 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)8 CaseScoreResult (ml.shifu.shifu.container.CaseScoreResult)4 Tuple (org.apache.pig.data.Tuple)4 BasicML (org.encog.ml.BasicML)3 ModelRunner (ml.shifu.shifu.core.ModelRunner)2 ModelSpec (ml.shifu.shifu.core.model.ModelSpec)2 BasicMLData (org.encog.ml.data.basic.BasicMLData)2 BasicMLDataPair (org.encog.ml.data.basic.BasicMLDataPair)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 SortedMap (java.util.SortedMap)1 ColumnConfigComparator (ml.shifu.shifu.container.obj.ColumnConfig.ColumnConfigComparator)1 NormType (ml.shifu.shifu.container.obj.ModelNormalizeConf.NormType)1 ShifuException (ml.shifu.shifu.exception.ShifuException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1