use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.
the class CommonUtils method updateColumnConfigFlags.
/**
* Update target, listMeta, listForceSelect, listForceRemove
*
* @param modelConfig
* model config list
* @param columnConfigList
* the column config list
* @throws IOException
* any io exception
*
* @throws IllegalArgumentException
* if modelConfig is null or columnConfigList is null.
*/
public static void updateColumnConfigFlags(ModelConfig modelConfig, List<ColumnConfig> columnConfigList) throws IOException {
String targetColumnName = modelConfig.getTargetColumnName();
String weightColumnName = modelConfig.getWeightColumnName();
Set<NSColumn> setCategorialColumns = new HashSet<NSColumn>();
List<String> categoricalColumnNames = modelConfig.getCategoricalColumnNames();
if (CollectionUtils.isNotEmpty(categoricalColumnNames)) {
for (String column : categoricalColumnNames) {
setCategorialColumns.add(new NSColumn(column));
}
}
Set<NSColumn> setHybridColumns = new HashSet<NSColumn>();
Map<String, Double> hybridColumnNames = modelConfig.getHybridColumnNames();
if (hybridColumnNames != null && hybridColumnNames.size() > 0) {
for (Entry<String, Double> entry : hybridColumnNames.entrySet()) {
setHybridColumns.add(new NSColumn(entry.getKey()));
}
}
Set<NSColumn> setMeta = new HashSet<NSColumn>();
if (CollectionUtils.isNotEmpty(modelConfig.getMetaColumnNames())) {
for (String meta : modelConfig.getMetaColumnNames()) {
setMeta.add(new NSColumn(meta));
}
}
Set<NSColumn> setForceRemove = new HashSet<NSColumn>();
if (Boolean.TRUE.equals(modelConfig.getVarSelect().getForceEnable()) && CollectionUtils.isNotEmpty(modelConfig.getListForceRemove())) {
// if we need to update force remove, only and if one the force is enabled
for (String forceRemoveName : modelConfig.getListForceRemove()) {
setForceRemove.add(new NSColumn(forceRemoveName));
}
}
Set<NSColumn> setForceSelect = new HashSet<NSColumn>(512);
if (Boolean.TRUE.equals(modelConfig.getVarSelect().getForceEnable()) && CollectionUtils.isNotEmpty(modelConfig.getListForceSelect())) {
// if we need to update force select, only and if one the force is enabled
for (String forceSelectName : modelConfig.getListForceSelect()) {
setForceSelect.add(new NSColumn(forceSelectName));
}
}
for (ColumnConfig config : columnConfigList) {
String varName = config.getColumnName();
// reset it
config.setColumnFlag(null);
if (NSColumnUtils.isColumnEqual(weightColumnName, varName)) {
config.setColumnFlag(ColumnFlag.Weight);
// reset final select
config.setFinalSelect(false);
} else if (NSColumnUtils.isColumnEqual(targetColumnName, varName)) {
config.setColumnFlag(ColumnFlag.Target);
// reset final select
config.setFinalSelect(false);
} else if (setMeta.contains(new NSColumn(varName))) {
config.setColumnFlag(ColumnFlag.Meta);
// reset final select
config.setFinalSelect(false);
} else if (setForceRemove.contains(new NSColumn(varName))) {
config.setColumnFlag(ColumnFlag.ForceRemove);
// reset final select
config.setFinalSelect(false);
} else if (setForceSelect.contains(new NSColumn(varName))) {
config.setColumnFlag(ColumnFlag.ForceSelect);
}
if (NSColumnUtils.isColumnEqual(weightColumnName, varName)) {
// weight column is numerical
config.setColumnType(ColumnType.N);
} else if (NSColumnUtils.isColumnEqual(targetColumnName, varName)) {
// target column is set to categorical column
config.setColumnType(ColumnType.C);
} else if (setHybridColumns.contains(new NSColumn(varName))) {
config.setColumnType(ColumnType.H);
String newVarName = null;
if (Environment.getBoolean(Constants.SHIFU_NAMESPACE_STRICT_MODE, false)) {
newVarName = new NSColumn(varName).getFullColumnName();
} else {
newVarName = new NSColumn(varName).getSimpleName();
}
config.setHybridThreshold(hybridColumnNames.get(newVarName));
} else if (setCategorialColumns.contains(new NSColumn(varName))) {
config.setColumnType(ColumnType.C);
} else {
config.setColumnType(ColumnType.N);
}
}
}
use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.
the class FullScoreUDF method exec.
public Tuple exec(Tuple input) throws IOException {
Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.header, 0);
CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
if (cs == null) {
log.error("Get null result.");
return null;
}
Tuple tuple = TupleFactory.getInstance().newTuple();
tuple.append(cs.getAvgScore());
tuple.append(cs.getMaxScore());
tuple.append(cs.getMinScore());
for (double score : cs.getScores()) {
tuple.append(score);
}
List<String> metaList = modelConfig.getMetaColumnNames();
for (String meta : metaList) {
tuple.append(rawDataNsMap.get(new NSColumn(meta)));
}
return tuple;
}
Aggregations