Search in sources :

Example 6 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class ModelSpecLoaderUtils method loadGenericModels.

/**
 * Load generic model from local or HDFS storage and initialize.
 *
 * @param modelConfig
 *            model config
 * @param genericModelConfigs
 *            generic model files
 * @param sourceType
 *            source type
 * @param models
 *            models list to have the result
 * @throws IOException
 *             Exception when fail to load generic models
 */
public static void loadGenericModels(ModelConfig modelConfig, List<FileStatus> genericModelConfigs, SourceType sourceType, List<BasicML> models) throws IOException {
    FileSystem hdfs = HDFSUtils.getFS();
    PathFinder pathFinder = new PathFinder(modelConfig);
    String src = pathFinder.getModelsPath(sourceType);
    File f = new File(System.getProperty(Constants.USER_DIR) + "/models");
    // check if model dir is exist
    if (!f.exists()) {
        // source
        hdfs.copyToLocalFile(// source
        false, // source
        new Path(src), new Path(System.getProperty(Constants.USER_DIR)), true);
    }
    for (FileStatus fst : genericModelConfigs) {
        GenericModelConfig gmc = // loading as GenericModelConfig
        CommonUtils.loadJSON(fst.getPath().toString(), sourceType, GenericModelConfig.class);
        String alg = (String) gmc.getProperties().get(Constants.GENERIC_ALGORITHM);
        String genericModelPath = // <usr.dir>
        System.getProperty(Constants.USER_DIR) + File.separator + // + /models
        Constants.MODELS;
        // + File.separator + modelConfig.getBasic().getName(); // + /ModelName
        gmc.getProperties().put(Constants.GENERIC_MODEL_PATH, genericModelPath);
        log.info("Generic model path is : {}.", gmc.getProperties().get(Constants.GENERIC_MODEL_PATH));
        if (Constants.TENSORFLOW.equals(alg)) {
            try {
                // Initiate a evaluator class instance which used for evaluation
                Class<?> clazz = Class.forName(ComputeImplClass.Tensorflow.getClassName());
                Computable computable = (Computable) clazz.newInstance();
                computable.init(gmc);
                GenericModel genericModel = new GenericModel(computable, gmc.getProperties());
                models.add(genericModel);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else {
            throw new RuntimeException("Algorithm: " + alg + " is not supported in generic model yet.");
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 7 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CommonUtils method copyEvalDataFromLocalToHDFS.

/**
 * Sync-up the evaluation data into HDFS
 *
 * @param modelConfig
 *            - ModelConfig
 * @param evalName
 *            eval name in ModelConfig
 * @throws IOException
 *             - error occur when copying data
 */
@SuppressWarnings("deprecation")
public static void copyEvalDataFromLocalToHDFS(ModelConfig modelConfig, String evalName) throws IOException {
    EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalName);
    if (evalConfig != null) {
        FileSystem hdfs = HDFSUtils.getFS();
        FileSystem localFs = HDFSUtils.getLocalFS();
        PathFinder pathFinder = new PathFinder(modelConfig);
        Path evalDir = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.LOCAL));
        Path dst = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.HDFS));
        if (// local evaluation folder exists
        localFs.exists(evalDir) && // is directory
        localFs.getFileStatus(evalDir).isDir() && !hdfs.exists(dst)) {
            hdfs.copyFromLocalFile(evalDir, dst);
        }
        if (StringUtils.isNotBlank(evalConfig.getScoreMetaColumnNameFile())) {
            hdfs.copyFromLocalFile(new Path(evalConfig.getScoreMetaColumnNameFile()), new Path(pathFinder.getEvalSetPath(evalConfig)));
        }
        // sync evaluation meta.column.file to hdfs
        if (StringUtils.isNotBlank(evalConfig.getDataSet().getMetaColumnNameFile())) {
            hdfs.copyFromLocalFile(new Path(evalConfig.getDataSet().getMetaColumnNameFile()), new Path(pathFinder.getEvalSetPath(evalConfig)));
        }
    }
}
Also used : EvalConfig(ml.shifu.shifu.container.obj.EvalConfig) FileSystem(org.apache.hadoop.fs.FileSystem) PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 8 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CommonUtils method getPigParamMap.

/**
 * Return all parameters for pig execution.
 *
 * @param modelConfig
 *            model config
 * @param sourceType
 *            source type
 * @return map of configurations
 * @throws IOException
 *             any io exception
 * @throws IllegalArgumentException
 *             if modelConfig is null.
 */
public static Map<String, String> getPigParamMap(ModelConfig modelConfig, SourceType sourceType) throws IOException {
    if (modelConfig == null) {
        throw new IllegalArgumentException("modelConfig should not be null.");
    }
    PathFinder pathFinder = new PathFinder(modelConfig);
    Map<String, String> pigParamMap = new HashMap<String, String>();
    pigParamMap.put(Constants.NUM_PARALLEL, Environment.getInt(Environment.HADOOP_NUM_PARALLEL, 400).toString());
    log.info("jar path is {}", pathFinder.getJarPath());
    pigParamMap.put(Constants.PATH_JAR, pathFinder.getJarPath());
    pigParamMap.put(Constants.PATH_RAW_DATA, modelConfig.getDataSetRawPath());
    pigParamMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedDataPath(sourceType));
    // default norm is not for clean, so set it to false, this will be overrided in Train#Norm for tree models
    pigParamMap.put(Constants.IS_NORM_FOR_CLEAN, Boolean.FALSE.toString());
    pigParamMap.put(Constants.PATH_PRE_TRAINING_STATS, pathFinder.getPreTrainingStatsPath(sourceType));
    pigParamMap.put(Constants.PATH_STATS_BINNING_INFO, pathFinder.getUpdatedBinningInfoPath(sourceType));
    pigParamMap.put(Constants.PATH_STATS_PSI_INFO, pathFinder.getPSIInfoPath(sourceType));
    pigParamMap.put(Constants.WITH_SCORE, Boolean.FALSE.toString());
    pigParamMap.put(Constants.STATS_SAMPLE_RATE, modelConfig.getBinningSampleRate().toString());
    pigParamMap.put(Constants.PATH_MODEL_CONFIG, pathFinder.getModelConfigPath(sourceType));
    pigParamMap.put(Constants.PATH_COLUMN_CONFIG, pathFinder.getColumnConfigPath(sourceType));
    pigParamMap.put(Constants.PATH_SELECTED_RAW_DATA, pathFinder.getSelectedRawDataPath(sourceType));
    pigParamMap.put(Constants.PATH_BIN_AVG_SCORE, pathFinder.getBinAvgScorePath(sourceType));
    pigParamMap.put(Constants.PATH_TRAIN_SCORE, pathFinder.getTrainScoresPath(sourceType));
    pigParamMap.put(Constants.SOURCE_TYPE, sourceType.toString());
    pigParamMap.put(Constants.JOB_QUEUE, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, Constants.DEFAULT_JOB_QUEUE));
    return pigParamMap;
}
Also used : PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 9 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CalculateStatsActor method onReceive.

/* (non-Javadoc)
     * @see akka.actor.UntypedActor#onReceive(java.lang.Object)
     */
@Override
public void onReceive(Object message) throws Exception {
    if (message instanceof AkkaActorInputMessage) {
        resultCnt = 0;
        AkkaActorInputMessage msg = (AkkaActorInputMessage) message;
        List<Scanner> scanners = msg.getScanners();
        log.debug("Num of Scanners: " + scanners.size());
        for (Scanner scanner : scanners) {
            dataLoadRef.tell(new ScanStatsRawDataMessage(scanners.size(), scanner), getSelf());
        }
    } else if (message instanceof StatsResultMessage) {
        StatsResultMessage statsRstMsg = (StatsResultMessage) message;
        ColumnConfig columnConfig = statsRstMsg.getColumnConfig();
        columnConfigList.set(columnConfig.getColumnNum(), columnConfig);
        resultCnt++;
        if (resultCnt == columnNumToActorMap.size()) {
            log.info("Received " + resultCnt + " messages. Finished Calculating Stats.");
            PathFinder pathFinder = new PathFinder(modelConfig);
            JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath()), columnConfigList);
            getContext().system().shutdown();
        }
    } else if (message instanceof ExceptionMessage) {
        // since some children actors meet some exception, shutdown the system
        ExceptionMessage msg = (ExceptionMessage) message;
        getContext().system().shutdown();
        // and wrapper the exception into Return status
        addExceptionIntoCondition(msg.getException());
    } else {
        unhandled(message);
    }
}
Also used : AkkaActorInputMessage(ml.shifu.shifu.message.AkkaActorInputMessage) Scanner(java.util.Scanner) ExceptionMessage(ml.shifu.shifu.message.ExceptionMessage) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) StatsResultMessage(ml.shifu.shifu.message.StatsResultMessage) PathFinder(ml.shifu.shifu.fs.PathFinder) File(java.io.File) ScanStatsRawDataMessage(ml.shifu.shifu.message.ScanStatsRawDataMessage)

Example 10 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class PostTrainActor method onReceive.

/* (non-Javadoc)
     * @see akka.actor.UntypedActor#onReceive(java.lang.Object)
     */
@Override
public void onReceive(Object message) throws Exception {
    if (message instanceof AkkaActorInputMessage) {
        resultCnt = 0;
        AkkaActorInputMessage msg = (AkkaActorInputMessage) message;
        List<Scanner> scanners = msg.getScanners();
        log.debug("Num of Scanners: " + scanners.size());
        int streamId = 0;
        for (Scanner scanner : scanners) {
            dataLoadRef.tell(new ScanEvalDataMessage(streamId++, scanners.size(), scanner), getSelf());
        }
    } else if (message instanceof StatsResultMessage) {
        StatsResultMessage statsRstMsg = (StatsResultMessage) message;
        ColumnConfig columnConfig = statsRstMsg.getColumnConfig();
        columnConfigList.set(columnConfig.getColumnNum(), columnConfig);
        resultCnt++;
        log.debug("Received " + resultCnt + " messages, expected message count is:" + expectedResultCnt);
        if (resultCnt == expectedResultCnt) {
            log.info("Finished post-train.");
            PathFinder pathFinder = new PathFinder(modelConfig);
            JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath()), columnConfigList);
            getContext().system().shutdown();
        }
    } else if (message instanceof ExceptionMessage) {
        // since some children actors meet some exception, shutdown the system
        ExceptionMessage msg = (ExceptionMessage) message;
        getContext().system().shutdown();
        // and wrapper the exception into Return status
        addExceptionIntoCondition(msg.getException());
    } else {
        unhandled(message);
    }
}
Also used : AkkaActorInputMessage(ml.shifu.shifu.message.AkkaActorInputMessage) Scanner(java.util.Scanner) ExceptionMessage(ml.shifu.shifu.message.ExceptionMessage) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) StatsResultMessage(ml.shifu.shifu.message.StatsResultMessage) ScanEvalDataMessage(ml.shifu.shifu.message.ScanEvalDataMessage) PathFinder(ml.shifu.shifu.fs.PathFinder) File(java.io.File)

Aggregations

PathFinder (ml.shifu.shifu.fs.PathFinder)20 Path (org.apache.hadoop.fs.Path)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 File (java.io.File)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)3 ShifuException (ml.shifu.shifu.exception.ShifuException)3 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)2 BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 Scanner (java.util.Scanner)2 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)2 AkkaActorInputMessage (ml.shifu.shifu.message.AkkaActorInputMessage)2 ExceptionMessage (ml.shifu.shifu.message.ExceptionMessage)2 StatsResultMessage (ml.shifu.shifu.message.StatsResultMessage)2 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1