Search in sources :

Example 11 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CommonUtilsTest method syncTest.

// @Test
public void syncTest() throws IOException {
    ModelConfig config = ModelConfig.createInitModelConfig(".", ALGORITHM.NN, "test", false);
    config.setModelSetName("testModel");
    jsonMapper.writerWithDefaultPrettyPrinter().writeValue(new File("ModelConfig.json"), config);
    ColumnConfig col = new ColumnConfig();
    col.setColumnName("ColumnA");
    List<ColumnConfig> columnConfigList = new ArrayList<ColumnConfig>();
    columnConfigList.add(col);
    config.getDataSet().setSource(SourceType.LOCAL);
    ;
    jsonMapper.writerWithDefaultPrettyPrinter().writeValue(new File("ColumnConfig.json"), columnConfigList);
    File file = null;
    file = new File("models");
    if (!file.exists()) {
        FileUtils.forceMkdir(file);
    }
    file = new File("models/model1.nn");
    if (!file.exists()) {
        if (file.createNewFile()) {
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Constants.DEFAULT_CHARSET));
            writer.write("test string");
            writer.close();
        } else {
            LOG.warn("Create file {} failed", file.getAbsolutePath());
        }
    }
    file = new File("EvalSets/test");
    if (!file.exists()) {
        FileUtils.forceMkdir(file);
    }
    file = new File("EvalSets/test/EvalConfig.json");
    if (!file.exists()) {
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Constants.DEFAULT_CHARSET));
        writer.write("test string");
        writer.close();
    }
    CommonUtils.copyConfFromLocalToHDFS(config, new PathFinder(config));
    file = new File("ModelSets");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel/ModelConfig.json");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel/ColumnConfig.json");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel/ReasonCodeMap.json");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel/models/model1.nn");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets/testModel/EvalSets/test/EvalConfig.json");
    Assert.assertTrue(file.exists());
    file = new File("ModelSets");
    if (file.exists()) {
        FileUtils.deleteDirectory(file);
    }
    file = new File("ColumnConfig.json");
    FileUtils.deleteQuietly(file);
    file = new File("ModelConfig.json");
    FileUtils.deleteQuietly(file);
    FileUtils.deleteDirectory(new File("models"));
    FileUtils.deleteDirectory(new File("EvalSets"));
}
Also used : ModelConfig(ml.shifu.shifu.container.obj.ModelConfig) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) OutputStreamWriter(java.io.OutputStreamWriter) PathFinder(ml.shifu.shifu.fs.PathFinder) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 12 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ModelSpecLoaderUtils method findModels.

/**
 * Find the model files for some @ModelConfig. There is a little tricky about this function.
 * If @EvalConfig is specified, try to load the models according setting in @EvalConfig,
 * or if {@link EvalConfig} is null or modelsPath is blank, Shifu will try to load models under `models`
 * directory
 *
 * @param modelConfig
 *            - {@link ModelConfig}, need this, since the model file may exist in HDFS
 * @param evalConfig
 *            - {@link EvalConfig}, maybe null
 * @param sourceType
 *            - Where is file system
 * @return - {@link FileStatus} array for all found models
 * @throws IOException
 *             io exception to load files
 */
public static List<FileStatus> findModels(ModelConfig modelConfig, EvalConfig evalConfig, SourceType sourceType) throws IOException {
    FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
    PathFinder pathFinder = new PathFinder(modelConfig);
    // If the algorithm in ModelConfig is NN, we only load NN models
    // the same as SVM, LR
    String modelSuffix = "." + modelConfig.getAlgorithm().toLowerCase();
    List<FileStatus> fileList = new ArrayList<>();
    if (null == evalConfig || StringUtils.isBlank(evalConfig.getModelsPath())) {
        Path path = new Path(pathFinder.getModelsPath(sourceType));
        fileList.addAll(Arrays.asList(fs.listStatus(path, new FileSuffixPathFilter(modelSuffix))));
    } else {
        String modelsPath = evalConfig.getModelsPath();
        FileStatus[] expandedPaths = fs.globStatus(new Path(modelsPath));
        if (ArrayUtils.isNotEmpty(expandedPaths)) {
            for (FileStatus fileStatus : expandedPaths) {
                fileList.addAll(Arrays.asList(// list all files
                fs.listStatus(// list all files
                fileStatus.getPath(), new FileSuffixPathFilter(modelSuffix))));
            }
        }
    }
    return fileList;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 13 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ModelSpecLoaderUtils method loadSubModels.

/**
 * Load sub-models under current model space
 *
 * @param modelConfig
 *            - {@link ModelConfig}, need this, since the model file may exist in HDFS
 * @param columnConfigList
 *            - List of {@link ColumnConfig}
 * @param evalConfig
 *            - {@link EvalConfig}, maybe null
 * @param sourceType
 *            - {@link SourceType}, HDFS or Local?
 * @param gbtConvertToProb
 *            - convert to probability or not for gbt model
 * @param gbtScoreConvertStrategy
 *            - gbt score conversion strategy
 * @return list of {@link ModelSpec} for sub models
 */
@SuppressWarnings("deprecation")
public static List<ModelSpec> loadSubModels(ModelConfig modelConfig, List<ColumnConfig> columnConfigList, EvalConfig evalConfig, RawSourceData.SourceType sourceType, Boolean gbtConvertToProb, String gbtScoreConvertStrategy) {
    List<ModelSpec> modelSpecs = new ArrayList<ModelSpec>();
    FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
    // we have to register PersistBasicFloatNetwork for loading such models
    PersistorRegistry.getInstance().add(new PersistBasicFloatNetwork());
    PathFinder pathFinder = new PathFinder(modelConfig);
    String modelsPath = null;
    if (evalConfig == null || StringUtils.isEmpty(evalConfig.getModelsPath())) {
        modelsPath = pathFinder.getModelsPath(sourceType);
    } else {
        modelsPath = evalConfig.getModelsPath();
    }
    try {
        FileStatus[] fsArr = fs.listStatus(new Path(modelsPath));
        for (FileStatus fileStatus : fsArr) {
            if (fileStatus.isDir()) {
                ModelSpec modelSpec = loadSubModelSpec(modelConfig, columnConfigList, fileStatus, sourceType, gbtConvertToProb, gbtScoreConvertStrategy);
                if (modelSpec != null) {
                    modelSpecs.add(modelSpec);
                }
            }
        }
    } catch (IOException e) {
        log.error("Error occurred when loading sub-models.", e);
    }
    return modelSpecs;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder) ModelSpec(ml.shifu.shifu.core.model.ModelSpec) PersistBasicFloatNetwork(ml.shifu.shifu.core.dtrain.dataset.PersistBasicFloatNetwork)

Example 14 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ModelSpecLoaderUtils method findGenericModels.

/**
 * Load the generic model config and parse it to java object.
 * Similar as {@link #findModels(ModelConfig, EvalConfig, RawSourceData.SourceType)}
 *
 * @param modelConfig
 *            - {@link ModelConfig}, need this, since the model file may exist in HDFS
 * @param evalConfig
 *            - {@link EvalConfig}, maybe null
 * @param sourceType
 *            - {@link SourceType}, HDFS or Local?
 * @return the file status list for generic models
 * @throws IOException
 *             Exception occurred when finding generic models
 */
public static List<FileStatus> findGenericModels(ModelConfig modelConfig, EvalConfig evalConfig, RawSourceData.SourceType sourceType) throws IOException {
    FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
    PathFinder pathFinder = new PathFinder(modelConfig);
    // Find generic model config file with suffix .json
    String modelSuffix = ".json";
    List<FileStatus> fileList = new ArrayList<>();
    if (null == evalConfig || StringUtils.isBlank(evalConfig.getModelsPath())) {
        // modelsPath / <ModelName>
        Path path = new Path(pathFinder.getModelsPath(sourceType));
        // + File.separator + modelConfig.getBasic().getName());
        fileList.addAll(Arrays.asList(fs.listStatus(path, new FileSuffixPathFilter(modelSuffix))));
    } else {
        String modelsPath = evalConfig.getModelsPath();
        // models / <ModelName>
        FileStatus[] expandedPaths = fs.globStatus(new Path(modelsPath));
        // + File.separator + modelConfig.getBasic().getName()));
        if (ArrayUtils.isNotEmpty(expandedPaths)) {
            for (FileStatus epath : expandedPaths) {
                fileList.addAll(Arrays.asList(// list all files with suffix
                fs.listStatus(// list all files with suffix
                epath.getPath(), new FileSuffixPathFilter(modelSuffix))));
            }
        }
    }
    return fileList;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 15 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ConfusionMatrix method getEvalScoreHeader.

private String[] getEvalScoreHeader() throws IOException {
    PathFinder pathFinder = new PathFinder(modelConfig);
    SourceType sourceType = evalConfig.getDataSet().getSource();
    String pathHeader = null;
    boolean isDir = ShifuFileUtils.isDir(pathFinder.getEvalScorePath(evalConfig, sourceType), sourceType);
    if (isDir) {
        // find the .pig_header file
        pathHeader = pathFinder.getEvalScoreHeaderPath(evalConfig, sourceType);
    } else {
        // evaluation data file
        pathHeader = pathFinder.getEvalScorePath(evalConfig, sourceType);
    }
    return CommonUtils.getHeaders(pathHeader, this.delimiter, sourceType, false);
}
Also used : SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) PathFinder(ml.shifu.shifu.fs.PathFinder)

Aggregations

PathFinder (ml.shifu.shifu.fs.PathFinder)20 Path (org.apache.hadoop.fs.Path)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 File (java.io.File)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)3 ShifuException (ml.shifu.shifu.exception.ShifuException)3 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)2 BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 Scanner (java.util.Scanner)2 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)2 AkkaActorInputMessage (ml.shifu.shifu.message.AkkaActorInputMessage)2 ExceptionMessage (ml.shifu.shifu.message.ExceptionMessage)2 StatsResultMessage (ml.shifu.shifu.message.StatsResultMessage)2 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1