Search in sources :

Example 11 with EvalConfig

use of ml.shifu.shifu.container.obj.EvalConfig in project shifu by ShifuML.

the class EvalModelProcessor method runEval.

/**
 * Running evaluation including scoring and performance evaluation two steps.
 *
 * <p>
 * This function will switch to pig or akka evaluation depends on the modelConfig running mode
 *
 * @throws IOException
 *             any exception in running pig evaluation or akka evaluation
 */
private void runEval(List<EvalConfig> evalSetList) throws IOException {
    // do it only once
    syncDataToHdfs(evalSetList);
    // validation for score column
    for (EvalConfig evalConfig : evalSetList) {
        List<String> scoreMetaColumns = evalConfig.getScoreMetaColumns(modelConfig);
        if (scoreMetaColumns.size() > 5) {
            LOG.error("Starting from 0.10.x, 'scoreMetaColumns' is used for benchmark score columns and limited to at most 5.");
            LOG.error("If meta columns are set in file of 'scoreMetaColumns', please move meta column config to 'eval#dataSet#metaColumnNameFile' part.");
            LOG.error("If 'eval#dataSet#metaColumnNameFile' is duplicated with training 'metaColumnNameFile', you can rename it to another file with different name.");
            return;
        }
    }
    if (Environment.getBoolean(Constants.SHIFU_EVAL_PARALLEL, true) && modelConfig.isMapReduceRunMode() && evalSetList.size() > 1) {
        // run in parallel
        int parallelNum = Environment.getInt(Constants.SHIFU_EVAL_PARALLEL_NUM, 5);
        if (parallelNum <= 0 || parallelNum > 100) {
            throw new IllegalArgumentException(Constants.SHIFU_EVAL_PARALLEL_NUM + " in shifuconfig should be in (0, 100], by default it is 5.");
        }
        int evalSize = evalSetList.size();
        int mod = evalSize % parallelNum;
        int batch = evalSize / parallelNum;
        batch = (mod == 0 ? batch : (batch + 1));
        for (int i = 0; i < batch; i++) {
            int batchSize = (mod != 0 && i == (batch - 1)) ? mod : parallelNum;
            // lunch current batch size
            LOG.info("Starting to run eval score in {}/{} round", (i + 1), batch);
            final CountDownLatch cdl = new CountDownLatch(batchSize);
            for (int j = 0; j < batchSize; j++) {
                int currentIndex = i * parallelNum + j;
                final EvalConfig config = evalSetList.get(currentIndex);
                // save tmp models
                Thread evalRunThread = new Thread(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            runEval(config);
                        } catch (IOException e) {
                            LOG.error("Exception in eval:", e);
                        } catch (Exception e) {
                            LOG.error("Exception in eval:", e);
                        }
                        cdl.countDown();
                    }
                }, config.getName());
                // print eval name to log4j console to make each one is easy to be get from logs
                evalRunThread.start();
                // each one sleep 3s to avoid conflict in initialization
                try {
                    Thread.sleep(3000);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
            }
            LOG.info("Starting to wait eval in {}/{} round", (i + 1), batch);
            // await all threads done
            try {
                cdl.await();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
            LOG.info("Finish eval in {}/{} round", (i + 1), batch);
        }
        LOG.info("Finish all eval parallel running with eval size {}.", evalSize);
    } else {
        // for old sequential runs
        for (EvalConfig evalConfig : evalSetList) {
            runEval(evalConfig);
        }
    }
}
Also used : EvalConfig(ml.shifu.shifu.container.obj.EvalConfig) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 12 with EvalConfig

use of ml.shifu.shifu.container.obj.EvalConfig in project shifu by ShifuML.

the class EvalModelProcessor method createNewEval.

/**
 * Create a evaluation with <code>name</code>
 *
 * @param name
 *            - the evaluation set name
 * @throws IOException
 *             any io exception
 */
private void createNewEval(String name) throws IOException {
    EvalConfig evalConfig = modelConfig.getEvalConfigByName(name);
    if (evalConfig != null) {
        throw new ShifuException(ShifuErrorCode.ERROR_MODEL_EVALSET_ALREADY_EXIST, "EvalSet - " + name + " already exists in ModelConfig. Please use another evalset name");
    }
    evalConfig = new EvalConfig();
    evalConfig.setName(name);
    evalConfig.setDataSet(modelConfig.getDataSet().cloneRawSourceData());
    // create empty <EvalSetName>Score.meta.column.names
    ShifuFileUtils.createFileIfNotExists(new Path(evalConfig.getName() + Constants.DEFAULT_CHAMPIONSCORE_META_COLUMN_FILE).toString(), SourceType.LOCAL);
    // create empty <EvalSetName>.meta.column.names
    String namesFilePath = Constants.COLUMN_META_FOLDER_NAME + File.separator + evalConfig.getName() + "." + Constants.DEFAULT_META_COLUMN_FILE;
    ShifuFileUtils.createFileIfNotExists(new Path(namesFilePath).toString(), SourceType.LOCAL);
    evalConfig.getDataSet().setMetaColumnNameFile(namesFilePath);
    modelConfig.getEvals().add(evalConfig);
    try {
        saveModelConfig();
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_WRITE_MODELCONFIG, e);
    }
    LOG.info("Create Eval - " + name);
}
Also used : EvalConfig(ml.shifu.shifu.container.obj.EvalConfig) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 13 with EvalConfig

use of ml.shifu.shifu.container.obj.EvalConfig in project shifu by ShifuML.

the class CommonUtils method copyConfFromLocalToHDFS.

/**
 * Sync up all local configuration files to HDFS.
 *
 * @param modelConfig
 *            the model config
 * @param pathFinder
 *            the path finder to locate file
 * @return if copy successful
 *
 * @throws IOException
 *             If any exception on HDFS IO or local IO.
 *
 * @throws NullPointerException
 *             If parameter {@code modelConfig} is null
 */
public static boolean copyConfFromLocalToHDFS(ModelConfig modelConfig, PathFinder pathFinder) throws IOException {
    FileSystem hdfs = HDFSUtils.getFS();
    FileSystem localFs = HDFSUtils.getLocalFS();
    Path pathModelSet = new Path(pathFinder.getModelSetPath(SourceType.HDFS));
    // don't check whether pathModelSet is exists, should be remove by user.
    hdfs.mkdirs(pathModelSet);
    // Copy ModelConfig
    Path srcModelConfig = new Path(pathFinder.getModelConfigPath(SourceType.LOCAL));
    Path dstModelConfig = new Path(pathFinder.getModelSetPath(SourceType.HDFS));
    hdfs.copyFromLocalFile(srcModelConfig, dstModelConfig);
    // Copy GridSearch config file if exists
    String gridConfigFile = modelConfig.getTrain().getGridConfigFile();
    if (gridConfigFile != null && !gridConfigFile.trim().equals("")) {
        Path srcGridConfig = new Path(modelConfig.getTrain().getGridConfigFile());
        Path dstGridConfig = new Path(pathFinder.getModelSetPath(SourceType.HDFS));
        hdfs.copyFromLocalFile(srcGridConfig, dstGridConfig);
    }
    // Copy ColumnConfig
    Path srcColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.LOCAL));
    Path dstColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.HDFS));
    if (ShifuFileUtils.isFileExists(srcColumnConfig.toString(), SourceType.LOCAL)) {
        hdfs.copyFromLocalFile(srcColumnConfig, dstColumnConfig);
    }
    // copy others
    Path srcVersion = new Path(pathFinder.getModelVersion(SourceType.LOCAL));
    if (localFs.exists(srcVersion)) {
        Path dstVersion = new Path(pathFinder.getModelVersion(SourceType.HDFS));
        hdfs.delete(dstVersion, true);
        hdfs.copyFromLocalFile(srcVersion, pathModelSet);
    }
    // Copy Models
    Path srcModels = new Path(pathFinder.getModelsPath(SourceType.LOCAL));
    if (localFs.exists(srcModels)) {
        Path dstModels = new Path(pathFinder.getModelsPath(SourceType.HDFS));
        hdfs.delete(dstModels, true);
        hdfs.copyFromLocalFile(srcModels, pathModelSet);
    }
    // Copy EvalSets
    Path evalsPath = new Path(pathFinder.getEvalsPath(SourceType.LOCAL));
    if (localFs.exists(evalsPath)) {
        for (FileStatus evalset : localFs.listStatus(evalsPath)) {
            EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalset.getPath().getName());
            if (evalConfig != null) {
                copyEvalDataFromLocalToHDFS(modelConfig, evalConfig.getName());
            }
        }
    }
    return true;
}
Also used : EvalConfig(ml.shifu.shifu.container.obj.EvalConfig) FileSystem(org.apache.hadoop.fs.FileSystem)

Aggregations

EvalConfig (ml.shifu.shifu.container.obj.EvalConfig)13 IOException (java.io.IOException)5 ShifuException (ml.shifu.shifu.exception.ShifuException)5 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)3 Test (org.testng.annotations.Test)3 File (java.io.File)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 ValidateResult (ml.shifu.shifu.container.meta.ValidateResult)1 RawSourceData (ml.shifu.shifu.container.obj.RawSourceData)1 PerformanceEvaluator (ml.shifu.shifu.core.PerformanceEvaluator)1 PathFinder (ml.shifu.shifu.fs.PathFinder)1 SourceFile (ml.shifu.shifu.fs.SourceFile)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1 AfterTest (org.testng.annotations.AfterTest)1