use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CommonUtilsTest method syncTest.
// @Test
public void syncTest() throws IOException {
ModelConfig config = ModelConfig.createInitModelConfig(".", ALGORITHM.NN, "test", false);
config.setModelSetName("testModel");
jsonMapper.writerWithDefaultPrettyPrinter().writeValue(new File("ModelConfig.json"), config);
ColumnConfig col = new ColumnConfig();
col.setColumnName("ColumnA");
List<ColumnConfig> columnConfigList = new ArrayList<ColumnConfig>();
columnConfigList.add(col);
config.getDataSet().setSource(SourceType.LOCAL);
;
jsonMapper.writerWithDefaultPrettyPrinter().writeValue(new File("ColumnConfig.json"), columnConfigList);
File file = null;
file = new File("models");
if (!file.exists()) {
FileUtils.forceMkdir(file);
}
file = new File("models/model1.nn");
if (!file.exists()) {
if (file.createNewFile()) {
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Constants.DEFAULT_CHARSET));
writer.write("test string");
writer.close();
} else {
LOG.warn("Create file {} failed", file.getAbsolutePath());
}
}
file = new File("EvalSets/test");
if (!file.exists()) {
FileUtils.forceMkdir(file);
}
file = new File("EvalSets/test/EvalConfig.json");
if (!file.exists()) {
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Constants.DEFAULT_CHARSET));
writer.write("test string");
writer.close();
}
CommonUtils.copyConfFromLocalToHDFS(config, new PathFinder(config));
file = new File("ModelSets");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel/ModelConfig.json");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel/ColumnConfig.json");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel/ReasonCodeMap.json");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel/models/model1.nn");
Assert.assertTrue(file.exists());
file = new File("ModelSets/testModel/EvalSets/test/EvalConfig.json");
Assert.assertTrue(file.exists());
file = new File("ModelSets");
if (file.exists()) {
FileUtils.deleteDirectory(file);
}
file = new File("ColumnConfig.json");
FileUtils.deleteQuietly(file);
file = new File("ModelConfig.json");
FileUtils.deleteQuietly(file);
FileUtils.deleteDirectory(new File("models"));
FileUtils.deleteDirectory(new File("EvalSets"));
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ModelSpecLoaderUtils method findModels.
/**
* Find the model files for some @ModelConfig. There is a little tricky about this function.
* If @EvalConfig is specified, try to load the models according setting in @EvalConfig,
* or if {@link EvalConfig} is null or modelsPath is blank, Shifu will try to load models under `models`
* directory
*
* @param modelConfig
* - {@link ModelConfig}, need this, since the model file may exist in HDFS
* @param evalConfig
* - {@link EvalConfig}, maybe null
* @param sourceType
* - Where is file system
* @return - {@link FileStatus} array for all found models
* @throws IOException
* io exception to load files
*/
public static List<FileStatus> findModels(ModelConfig modelConfig, EvalConfig evalConfig, SourceType sourceType) throws IOException {
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
PathFinder pathFinder = new PathFinder(modelConfig);
// If the algorithm in ModelConfig is NN, we only load NN models
// the same as SVM, LR
String modelSuffix = "." + modelConfig.getAlgorithm().toLowerCase();
List<FileStatus> fileList = new ArrayList<>();
if (null == evalConfig || StringUtils.isBlank(evalConfig.getModelsPath())) {
Path path = new Path(pathFinder.getModelsPath(sourceType));
fileList.addAll(Arrays.asList(fs.listStatus(path, new FileSuffixPathFilter(modelSuffix))));
} else {
String modelsPath = evalConfig.getModelsPath();
FileStatus[] expandedPaths = fs.globStatus(new Path(modelsPath));
if (ArrayUtils.isNotEmpty(expandedPaths)) {
for (FileStatus fileStatus : expandedPaths) {
fileList.addAll(Arrays.asList(// list all files
fs.listStatus(// list all files
fileStatus.getPath(), new FileSuffixPathFilter(modelSuffix))));
}
}
}
return fileList;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ModelSpecLoaderUtils method loadSubModels.
/**
* Load sub-models under current model space
*
* @param modelConfig
* - {@link ModelConfig}, need this, since the model file may exist in HDFS
* @param columnConfigList
* - List of {@link ColumnConfig}
* @param evalConfig
* - {@link EvalConfig}, maybe null
* @param sourceType
* - {@link SourceType}, HDFS or Local?
* @param gbtConvertToProb
* - convert to probability or not for gbt model
* @param gbtScoreConvertStrategy
* - gbt score conversion strategy
* @return list of {@link ModelSpec} for sub models
*/
@SuppressWarnings("deprecation")
public static List<ModelSpec> loadSubModels(ModelConfig modelConfig, List<ColumnConfig> columnConfigList, EvalConfig evalConfig, RawSourceData.SourceType sourceType, Boolean gbtConvertToProb, String gbtScoreConvertStrategy) {
List<ModelSpec> modelSpecs = new ArrayList<ModelSpec>();
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
// we have to register PersistBasicFloatNetwork for loading such models
PersistorRegistry.getInstance().add(new PersistBasicFloatNetwork());
PathFinder pathFinder = new PathFinder(modelConfig);
String modelsPath = null;
if (evalConfig == null || StringUtils.isEmpty(evalConfig.getModelsPath())) {
modelsPath = pathFinder.getModelsPath(sourceType);
} else {
modelsPath = evalConfig.getModelsPath();
}
try {
FileStatus[] fsArr = fs.listStatus(new Path(modelsPath));
for (FileStatus fileStatus : fsArr) {
if (fileStatus.isDir()) {
ModelSpec modelSpec = loadSubModelSpec(modelConfig, columnConfigList, fileStatus, sourceType, gbtConvertToProb, gbtScoreConvertStrategy);
if (modelSpec != null) {
modelSpecs.add(modelSpec);
}
}
}
} catch (IOException e) {
log.error("Error occurred when loading sub-models.", e);
}
return modelSpecs;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ModelSpecLoaderUtils method findGenericModels.
/**
* Load the generic model config and parse it to java object.
* Similar as {@link #findModels(ModelConfig, EvalConfig, RawSourceData.SourceType)}
*
* @param modelConfig
* - {@link ModelConfig}, need this, since the model file may exist in HDFS
* @param evalConfig
* - {@link EvalConfig}, maybe null
* @param sourceType
* - {@link SourceType}, HDFS or Local?
* @return the file status list for generic models
* @throws IOException
* Exception occurred when finding generic models
*/
public static List<FileStatus> findGenericModels(ModelConfig modelConfig, EvalConfig evalConfig, RawSourceData.SourceType sourceType) throws IOException {
FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
PathFinder pathFinder = new PathFinder(modelConfig);
// Find generic model config file with suffix .json
String modelSuffix = ".json";
List<FileStatus> fileList = new ArrayList<>();
if (null == evalConfig || StringUtils.isBlank(evalConfig.getModelsPath())) {
// modelsPath / <ModelName>
Path path = new Path(pathFinder.getModelsPath(sourceType));
// + File.separator + modelConfig.getBasic().getName());
fileList.addAll(Arrays.asList(fs.listStatus(path, new FileSuffixPathFilter(modelSuffix))));
} else {
String modelsPath = evalConfig.getModelsPath();
// models / <ModelName>
FileStatus[] expandedPaths = fs.globStatus(new Path(modelsPath));
// + File.separator + modelConfig.getBasic().getName()));
if (ArrayUtils.isNotEmpty(expandedPaths)) {
for (FileStatus epath : expandedPaths) {
fileList.addAll(Arrays.asList(// list all files with suffix
fs.listStatus(// list all files with suffix
epath.getPath(), new FileSuffixPathFilter(modelSuffix))));
}
}
}
return fileList;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ConfusionMatrix method getEvalScoreHeader.
private String[] getEvalScoreHeader() throws IOException {
PathFinder pathFinder = new PathFinder(modelConfig);
SourceType sourceType = evalConfig.getDataSet().getSource();
String pathHeader = null;
boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
if (isDir) {
// find the .pig_header file
pathHeader = pathFinder.getEvalScoreHeaderPath(evalConfig, sourceType);
} else {
// evaluation data file
pathHeader = pathFinder.getEvalScorePath(evalConfig, sourceType);
}
return CommonUtils.getHeaders(pathHeader, this.delimiter, sourceType, false);
}
Aggregations