Search in sources :

Example 16 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class PerformanceEvaluator method review.

public void review(Iterable<ConfusionMatrixObject> matrixList, long records) throws IOException {
    PathFinder pathFinder = new PathFinder(modelConfig);
    // bucketing
    PerformanceResult result = bucketing(matrixList, records, evalConfig.getPerformanceBucketNum(), evalConfig.getDataSet().getWeightColumnName() != null);
    Writer writer = null;
    try {
        writer = ShifuFileUtils.getWriter(pathFinder.getEvalPerformancePath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
        JSONUtils.writeValue(writer, result);
    } catch (IOException e) {
        if (writer != null) {
            writer.close();
        }
    }
}
Also used : PerformanceResult(ml.shifu.shifu.container.obj.PerformanceResult) PathFinder(ml.shifu.shifu.fs.PathFinder) IOException(java.io.IOException) Writer(java.io.Writer)

Example 17 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class EvalConfig method getAllMetaColumns.

@JsonIgnore
public List<String> getAllMetaColumns(ModelConfig modelConfig) throws IOException {
    if (metaColumns == null) {
        synchronized (this) {
            if (metaColumns == null) {
                List<String> scoreMetaColumns = getScoreMetaColumns(modelConfig);
                if (scoreMetaColumns != null) {
                    this.metaColumns = new ArrayList<String>(scoreMetaColumns);
                }
                String metaColumnNameFile = dataSet.getMetaColumnNameFile();
                if (StringUtils.isNotBlank(metaColumnNameFile)) {
                    String path = metaColumnNameFile;
                    if (SourceType.HDFS.equals(dataSet.getSource())) {
                        PathFinder pathFinder = new PathFinder(modelConfig);
                        File file = new File(metaColumnNameFile);
                        path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString();
                    }
                    String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter();
                    List<String> rawMetaColumns = CommonUtils.readConfNamesAsList(path, dataSet.getSource(), delimiter);
                    if (CollectionUtils.isNotEmpty(metaColumns)) {
                        for (String column : rawMetaColumns) {
                            if (!metaColumns.contains(column)) {
                                metaColumns.add(column);
                            }
                        }
                    } else {
                        metaColumns = rawMetaColumns;
                    }
                }
                if (this.metaColumns == null) {
                    this.metaColumns = new ArrayList<String>();
                }
            }
        }
    }
    return metaColumns;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFinder(ml.shifu.shifu.fs.PathFinder) File(java.io.File) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore)

Example 18 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class EvalConfig method getScoreMetaColumns.

@JsonIgnore
public List<String> getScoreMetaColumns(ModelConfig modelConfig) throws IOException {
    if (scoreMetaColumns == null) {
        synchronized (this) {
            if (scoreMetaColumns == null) {
                if (StringUtils.isNotBlank(scoreMetaColumnNameFile)) {
                    String path = scoreMetaColumnNameFile;
                    if (SourceType.HDFS.equals(dataSet.getSource())) {
                        PathFinder pathFinder = new PathFinder(modelConfig);
                        File file = new File(scoreMetaColumnNameFile);
                        path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString();
                    }
                    String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter();
                    scoreMetaColumns = CommonUtils.readConfNamesAsList(path, dataSet.getSource(), delimiter);
                }
                if (this.scoreMetaColumns == null) {
                    this.scoreMetaColumns = new ArrayList<String>();
                }
            }
        }
    }
    return scoreMetaColumns;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFinder(ml.shifu.shifu.fs.PathFinder) File(java.io.File) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore)

Example 19 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CommonUtils method getPigParamMap.

/**
 * Return all parameters for pig execution.
 *
 * @param modelConfig
 *            model config
 * @param sourceType
 *            source type
 * @param pathFinder
 *            path finder instance
 * @return map of configurations
 * @throws IOException
 *             any io exception
 * @throws IllegalArgumentException
 *             if modelConfig is null.
 */
public static Map<String, String> getPigParamMap(ModelConfig modelConfig, SourceType sourceType, PathFinder pathFinder) throws IOException {
    if (modelConfig == null) {
        throw new IllegalArgumentException("modelConfig should not be null.");
    }
    if (pathFinder == null) {
        pathFinder = new PathFinder(modelConfig);
    }
    Map<String, String> pigParamMap = new HashMap<String, String>();
    pigParamMap.put(Constants.NUM_PARALLEL, Environment.getInt(Environment.HADOOP_NUM_PARALLEL, 400).toString());
    log.info("jar path is {}", pathFinder.getJarPath());
    pigParamMap.put(Constants.PATH_JAR, pathFinder.getJarPath());
    pigParamMap.put(Constants.PATH_RAW_DATA, modelConfig.getDataSetRawPath());
    pigParamMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedDataPath(sourceType));
    pigParamMap.put(Constants.PATH_PRE_TRAINING_STATS, pathFinder.getPreTrainingStatsPath(sourceType));
    pigParamMap.put(Constants.PATH_STATS_BINNING_INFO, pathFinder.getUpdatedBinningInfoPath(sourceType));
    pigParamMap.put(Constants.PATH_STATS_PSI_INFO, pathFinder.getPSIInfoPath(sourceType));
    pigParamMap.put(Constants.WITH_SCORE, Boolean.FALSE.toString());
    pigParamMap.put(Constants.STATS_SAMPLE_RATE, modelConfig.getBinningSampleRate().toString());
    pigParamMap.put(Constants.PATH_MODEL_CONFIG, pathFinder.getModelConfigPath(sourceType));
    pigParamMap.put(Constants.PATH_COLUMN_CONFIG, pathFinder.getColumnConfigPath(sourceType));
    pigParamMap.put(Constants.PATH_SELECTED_RAW_DATA, pathFinder.getSelectedRawDataPath(sourceType));
    pigParamMap.put(Constants.PATH_BIN_AVG_SCORE, pathFinder.getBinAvgScorePath(sourceType));
    pigParamMap.put(Constants.PATH_TRAIN_SCORE, pathFinder.getTrainScoresPath(sourceType));
    pigParamMap.put(Constants.SOURCE_TYPE, sourceType.toString());
    pigParamMap.put(Constants.JOB_QUEUE, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, Constants.DEFAULT_JOB_QUEUE));
    pigParamMap.put(Constants.DATASET_NAME, modelConfig.getBasic().getName());
    pigParamMap.put(Constants.SHIFU_OUTPUT_DELIMITER, CommonUtils.escapePigString(Environment.getProperty(Constants.SHIFU_OUTPUT_DATA_DELIMITER, Constants.DEFAULT_DELIMITER)));
    return pigParamMap;
}
Also used : PathFinder(ml.shifu.shifu.fs.PathFinder)

Example 20 with PathFinder

use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.

the class CommonUtils method loadModelConfig.

/**
 * Load model configuration from the path and the source type.
 *
 * @param path
 *            model file path
 * @param sourceType
 *            source type of model file
 * @return model config instance
 * @throws IOException
 *             if any IO exception in parsing json.
 *
 * @throws IllegalArgumentException
 *             if {@code path} is null or empty, if sourceType is null.
 */
public static ModelConfig loadModelConfig(String path, SourceType sourceType) throws IOException {
    ModelConfig modelConfig = loadJSON(path, sourceType, ModelConfig.class);
    if (StringUtils.isNotBlank(modelConfig.getTrain().getGridConfigFile())) {
        String gridConfigPath = modelConfig.getTrain().getGridConfigFile().trim();
        if (sourceType == SourceType.HDFS) {
            // gridsearch config file is uploaded to modelset path
            gridConfigPath = new PathFinder(modelConfig).getPathBySourceType(gridConfigPath.substring(gridConfigPath.lastIndexOf(File.separator) + 1), SourceType.HDFS);
        }
        // Only load file content. Grid search params parsing is done in {@link GridSearch} initialization.
        modelConfig.getTrain().setGridConfigFileContent(loadFileContent(gridConfigPath, sourceType));
    }
    return modelConfig;
}
Also used : ModelConfig(ml.shifu.shifu.container.obj.ModelConfig) PathFinder(ml.shifu.shifu.fs.PathFinder)

Aggregations

PathFinder (ml.shifu.shifu.fs.PathFinder)20 Path (org.apache.hadoop.fs.Path)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 File (java.io.File)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)3 ShifuException (ml.shifu.shifu.exception.ShifuException)3 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)2 BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 Scanner (java.util.Scanner)2 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)2 AkkaActorInputMessage (ml.shifu.shifu.message.AkkaActorInputMessage)2 ExceptionMessage (ml.shifu.shifu.message.ExceptionMessage)2 StatsResultMessage (ml.shifu.shifu.message.StatsResultMessage)2 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1