use of org.encog.ml.BasicML in project shifu by ShifuML.
the class NNOutput method writeBinaryModelWeightsToFileSystem.
private void writeBinaryModelWeightsToFileSystem(double[] weights, Path out) {
LOG.info("Writing NN models to {}.", out);
this.network.getFlat().setWeights(weights);
BasicML basicML = this.network;
try {
BinaryNNSerializer.save(modelConfig, columnConfigList, Arrays.asList(basicML), FileSystem.get(new Configuration()), out);
} catch (IOException e) {
LOG.error("Error in writing model", e);
}
}
use of org.encog.ml.BasicML in project shifu by ShifuML.
the class EvalScoreUDF method exec.
@SuppressWarnings("deprecation")
public Tuple exec(Tuple input) throws IOException {
if (isCsvFormat) {
String firstCol = ((input.get(0) == null) ? "" : input.get(0).toString());
if (this.headers[0].equals(CommonUtils.normColumnName(firstCol))) {
// TODO what to do if the column value == column name? ...
return null;
}
}
long start = System.currentTimeMillis();
if (this.modelRunner == null) {
// here to initialize modelRunner, this is moved from constructor to here to avoid OOM in client side.
// UDF in pig client will be initialized to get some metadata issues
List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models, this.outputHiddenLayerIndex, this.isMultiThreadScoring);
List<ModelSpec> subModels = ModelSpecLoaderUtils.loadSubModels(modelConfig, this.columnConfigList, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
if (CollectionUtils.isNotEmpty(subModels)) {
for (ModelSpec modelSpec : subModels) {
this.modelRunner.addSubModels(modelSpec, this.isMultiThreadScoring);
this.subModelsCnt.put(modelSpec.getModelName(), modelSpec.getModels().size());
}
}
this.modelCnt = models.size();
// reset models in classfication case
if (modelConfig.isClassification()) {
if (modelConfig.getTrain().isOneVsAll()) {
if (modelConfig.getTags().size() == 2) {
// onevsall, modelcnt is 1
this.modelCnt = 1;
} else {
this.modelCnt = modelConfig.getTags().size();
}
} else {
if (modelConfig.getTags().size() == 2) {
// native binary
this.modelCnt = 1;
} else {
// native multiple classification model cnt is bagging num
this.modelCnt = (this.modelCnt >= modelConfig.getBaggingNum() ? modelConfig.getBaggingNum() : this.modelCnt);
}
}
// reset models to
models = models.subList(0, this.modelCnt);
this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models, this.outputHiddenLayerIndex, this.isMultiThreadScoring);
}
this.modelRunner.setScoreScale(Integer.parseInt(this.scale));
log.info("DEBUG: model cnt " + this.modelCnt + " sub models cnt " + modelRunner.getSubModelsCnt());
}
Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.headers, this.segFilterSize);
if (MapUtils.isEmpty(rawDataNsMap)) {
return null;
}
String tag = CommonUtils.trimTag(rawDataNsMap.get(new NSColumn(modelConfig.getTargetColumnName(evalConfig))));
// filter invalid tag record out
// disable the tag check, since there is no bad tag in eval data set
// and user just want to score the data, but don't run performance evaluation
/*
* if(!tagSet.contains(tag)) {
* if(System.currentTimeMillis() % 100 == 0) {
* log.warn("Invalid tag: " + tag);
* }
* if(isPigEnabled(Constants.SHIFU_GROUP_COUNTER, "INVALID_TAG")) {
* PigStatusReporter.getInstance().getCounter(Constants.SHIFU_GROUP_COUNTER, Constants.COUNTER_RECORDS)
* .increment(1);
* }
* return null;
* }
*/
long startTime = System.nanoTime();
CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
long runInterval = (System.nanoTime() - startTime) / 1000L;
if (cs == null) {
if (System.currentTimeMillis() % 100 == 0) {
log.warn("Get null result, for input: " + input.toDelimitedString("|"));
}
return null;
}
Tuple tuple = TupleFactory.getInstance().newTuple();
tuple.append(tag);
String weight = null;
if (StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName())) {
weight = rawDataNsMap.get(new NSColumn(evalConfig.getDataSet().getWeightColumnName()));
} else {
weight = "1.0";
}
incrementTagCounters(tag, weight, runInterval);
Map<String, CaseScoreResult> subModelScores = cs.getSubModelScores();
tuple.append(weight);
if (this.isLinearTarget || modelConfig.isRegression()) {
if (CollectionUtils.isNotEmpty(cs.getScores())) {
appendModelScore(tuple, cs, true);
if (this.outputHiddenLayerIndex != 0) {
appendFirstHiddenOutputScore(tuple, cs.getHiddenLayerScores(), true);
}
}
if (MapUtils.isNotEmpty(subModelScores)) {
Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, CaseScoreResult> entry = iterator.next();
CaseScoreResult subCs = entry.getValue();
appendModelScore(tuple, subCs, false);
}
}
} else {
if (CollectionUtils.isNotEmpty(cs.getScores())) {
appendSimpleScore(tuple, cs);
tuple.append(this.mcPredictor.predictTag(cs).getTag());
}
if (MapUtils.isNotEmpty(subModelScores)) {
Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, CaseScoreResult> entry = iterator.next();
CaseScoreResult subCs = entry.getValue();
appendSimpleScore(tuple, subCs);
}
}
}
// append meta data
List<String> metaColumns = evalConfig.getAllMetaColumns(modelConfig);
if (CollectionUtils.isNotEmpty(metaColumns)) {
for (String meta : metaColumns) {
tuple.append(rawDataNsMap.get(new NSColumn(meta)));
}
}
if (System.currentTimeMillis() % 1000 == 0L) {
log.info("running time is " + (System.currentTimeMillis() - start) + " ms.");
}
return tuple;
}
use of org.encog.ml.BasicML in project shifu by ShifuML.
the class ModelSpecLoaderUtils method loadBasicModels.
/**
* Load basic models by configuration
*
* @param modelConfig
* ModelConfig
* @param evalConfig
* eval configuration
* @param sourceType
* source type
* @return the list of models
* @throws IOException
* Exception when fail to locate or load models
*/
public static List<BasicML> loadBasicModels(ModelConfig modelConfig, EvalConfig evalConfig, SourceType sourceType) throws IOException {
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
List<BasicML> models = new ArrayList<BasicML>();
List<FileStatus> modelFileStats = locateBasicModels(modelConfig, evalConfig, sourceType);
if (CollectionUtils.isNotEmpty(modelFileStats)) {
for (FileStatus f : modelFileStats) {
models.add(loadModel(modelConfig, f.getPath(), fs));
}
}
return models;
}
use of org.encog.ml.BasicML in project shifu by ShifuML.
the class ModelSpecLoaderUtils method loadBasicModels.
/**
* Load neural network models from specified file path
*
* @param modelsPath
* - a file or directory that contains .nn files
* @param alg
* the algorithm
* @param isConvertToProb
* if convert to prob for gbt model
* @param gbtScoreConvertStrategy
* specify how to convert gbt raw score
* @return - a list of @BasicML
* @throws IOException
* - throw exception when loading model files
*/
public static List<BasicML> loadBasicModels(final String modelsPath, final ALGORITHM alg, boolean isConvertToProb, String gbtScoreConvertStrategy) throws IOException {
if (modelsPath == null || alg == null || ALGORITHM.DT.equals(alg)) {
throw new IllegalArgumentException("The model path shouldn't be null");
}
// we have to register PersistBasicFloatNetwork for loading such models
if (ALGORITHM.NN.equals(alg)) {
PersistorRegistry.getInstance().add(new PersistBasicFloatNetwork());
}
File modelsPathDir = new File(modelsPath);
File[] modelFiles = modelsPathDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith("." + alg.name().toLowerCase());
}
});
if (modelFiles != null) {
// sort file names
Arrays.sort(modelFiles, new Comparator<File>() {
@Override
public int compare(File from, File to) {
return from.getName().compareTo(to.getName());
}
});
List<BasicML> models = new ArrayList<BasicML>(modelFiles.length);
for (File nnf : modelFiles) {
InputStream is = null;
try {
is = new FileInputStream(nnf);
if (ALGORITHM.NN.equals(alg)) {
GzipStreamPair pair = GzipStreamPair.isGZipFormat(is);
if (pair.isGzip()) {
models.add(BasicML.class.cast(NNModel.loadFromStream(pair.getInput())));
} else {
models.add(BasicML.class.cast(EncogDirectoryPersistence.loadObject(pair.getInput())));
}
} else if (ALGORITHM.LR.equals(alg)) {
models.add(LR.loadFromStream(is));
} else if (ALGORITHM.GBT.equals(alg) || ALGORITHM.RF.equals(alg)) {
models.add(TreeModel.loadFromStream(is, isConvertToProb, gbtScoreConvertStrategy));
}
} finally {
IOUtils.closeQuietly(is);
}
}
return models;
} else {
throw new IOException(String.format("Failed to list files in %s", modelsPathDir.getAbsolutePath()));
}
}
use of org.encog.ml.BasicML in project shifu by ShifuML.
the class CommonUtils method computeTreeModelFeatureImportance.
/**
* Compute feature importance for all bagging tree models.
*
* @param models
* the tree models, should be instance of TreeModel
* @return feature importance per each column id
* @throws IllegalStateException
* if no any feature importance from models
*/
public static Map<Integer, MutablePair<String, Double>> computeTreeModelFeatureImportance(List<BasicML> models) {
List<Map<Integer, MutablePair<String, Double>>> importanceList = new ArrayList<Map<Integer, MutablePair<String, Double>>>();
for (BasicML basicModel : models) {
if (basicModel instanceof TreeModel) {
TreeModel model = (TreeModel) basicModel;
Map<Integer, MutablePair<String, Double>> importances = model.getFeatureImportances();
importanceList.add(importances);
}
}
if (importanceList.size() < 1) {
throw new IllegalStateException("Feature importance calculation abort due to no tree model found!!");
}
return mergeImportanceList(importanceList);
}
Aggregations