use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class PerformanceEvaluator method review.
public void review(Iterable<ConfusionMatrixObject> matrixList, long records) throws IOException {
PathFinder pathFinder = new PathFinder(modelConfig);
// bucketing
PerformanceResult result = bucketing(matrixList, records, evalConfig.getPerformanceBucketNum(), evalConfig.getDataSet().getWeightColumnName() != null);
Writer writer = null;
try {
writer = ShifuFileUtils.getWriter(pathFinder.getEvalPerformancePath(evalConfig, evalConfig.getDataSet().getSource()), evalConfig.getDataSet().getSource());
JSONUtils.writeValue(writer, result);
} catch (IOException e) {
if (writer != null) {
writer.close();
}
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class EvalConfig method getAllMetaColumns.
@JsonIgnore
public List<String> getAllMetaColumns(ModelConfig modelConfig) throws IOException {
if (metaColumns == null) {
synchronized (this) {
if (metaColumns == null) {
List<String> scoreMetaColumns = getScoreMetaColumns(modelConfig);
if (scoreMetaColumns != null) {
this.metaColumns = new ArrayList<String>(scoreMetaColumns);
}
String metaColumnNameFile = dataSet.getMetaColumnNameFile();
if (StringUtils.isNotBlank(metaColumnNameFile)) {
String path = metaColumnNameFile;
if (SourceType.HDFS.equals(dataSet.getSource())) {
PathFinder pathFinder = new PathFinder(modelConfig);
File file = new File(metaColumnNameFile);
path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString();
}
String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter();
List<String> rawMetaColumns = CommonUtils.readConfNamesAsList(path, dataSet.getSource(), delimiter);
if (CollectionUtils.isNotEmpty(metaColumns)) {
for (String column : rawMetaColumns) {
if (!metaColumns.contains(column)) {
metaColumns.add(column);
}
}
} else {
metaColumns = rawMetaColumns;
}
}
if (this.metaColumns == null) {
this.metaColumns = new ArrayList<String>();
}
}
}
}
return metaColumns;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class EvalConfig method getScoreMetaColumns.
@JsonIgnore
public List<String> getScoreMetaColumns(ModelConfig modelConfig) throws IOException {
if (scoreMetaColumns == null) {
synchronized (this) {
if (scoreMetaColumns == null) {
if (StringUtils.isNotBlank(scoreMetaColumnNameFile)) {
String path = scoreMetaColumnNameFile;
if (SourceType.HDFS.equals(dataSet.getSource())) {
PathFinder pathFinder = new PathFinder(modelConfig);
File file = new File(scoreMetaColumnNameFile);
path = new Path(pathFinder.getEvalSetPath(this), file.getName()).toString();
}
String delimiter = StringUtils.isBlank(dataSet.getHeaderDelimiter()) ? dataSet.getDataDelimiter() : dataSet.getHeaderDelimiter();
scoreMetaColumns = CommonUtils.readConfNamesAsList(path, dataSet.getSource(), delimiter);
}
if (this.scoreMetaColumns == null) {
this.scoreMetaColumns = new ArrayList<String>();
}
}
}
}
return scoreMetaColumns;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CommonUtils method getPigParamMap.
/**
* Return all parameters for pig execution.
*
* @param modelConfig
* model config
* @param sourceType
* source type
* @param pathFinder
* path finder instance
* @return map of configurations
* @throws IOException
* any io exception
* @throws IllegalArgumentException
* if modelConfig is null.
*/
public static Map<String, String> getPigParamMap(ModelConfig modelConfig, SourceType sourceType, PathFinder pathFinder) throws IOException {
if (modelConfig == null) {
throw new IllegalArgumentException("modelConfig should not be null.");
}
if (pathFinder == null) {
pathFinder = new PathFinder(modelConfig);
}
Map<String, String> pigParamMap = new HashMap<String, String>();
pigParamMap.put(Constants.NUM_PARALLEL, Environment.getInt(Environment.HADOOP_NUM_PARALLEL, 400).toString());
log.info("jar path is {}", pathFinder.getJarPath());
pigParamMap.put(Constants.PATH_JAR, pathFinder.getJarPath());
pigParamMap.put(Constants.PATH_RAW_DATA, modelConfig.getDataSetRawPath());
pigParamMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedDataPath(sourceType));
pigParamMap.put(Constants.PATH_PRE_TRAINING_STATS, pathFinder.getPreTrainingStatsPath(sourceType));
pigParamMap.put(Constants.PATH_STATS_BINNING_INFO, pathFinder.getUpdatedBinningInfoPath(sourceType));
pigParamMap.put(Constants.PATH_STATS_PSI_INFO, pathFinder.getPSIInfoPath(sourceType));
pigParamMap.put(Constants.WITH_SCORE, Boolean.FALSE.toString());
pigParamMap.put(Constants.STATS_SAMPLE_RATE, modelConfig.getBinningSampleRate().toString());
pigParamMap.put(Constants.PATH_MODEL_CONFIG, pathFinder.getModelConfigPath(sourceType));
pigParamMap.put(Constants.PATH_COLUMN_CONFIG, pathFinder.getColumnConfigPath(sourceType));
pigParamMap.put(Constants.PATH_SELECTED_RAW_DATA, pathFinder.getSelectedRawDataPath(sourceType));
pigParamMap.put(Constants.PATH_BIN_AVG_SCORE, pathFinder.getBinAvgScorePath(sourceType));
pigParamMap.put(Constants.PATH_TRAIN_SCORE, pathFinder.getTrainScoresPath(sourceType));
pigParamMap.put(Constants.SOURCE_TYPE, sourceType.toString());
pigParamMap.put(Constants.JOB_QUEUE, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, Constants.DEFAULT_JOB_QUEUE));
pigParamMap.put(Constants.DATASET_NAME, modelConfig.getBasic().getName());
pigParamMap.put(Constants.SHIFU_OUTPUT_DELIMITER, CommonUtils.escapePigString(Environment.getProperty(Constants.SHIFU_OUTPUT_DATA_DELIMITER, Constants.DEFAULT_DELIMITER)));
return pigParamMap;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CommonUtils method loadModelConfig.
/**
* Load model configuration from the path and the source type.
*
* @param path
* model file path
* @param sourceType
* source type of model file
* @return model config instance
* @throws IOException
* if any IO exception in parsing json.
*
* @throws IllegalArgumentException
* if {@code path} is null or empty, if sourceType is null.
*/
public static ModelConfig loadModelConfig(String path, SourceType sourceType) throws IOException {
ModelConfig modelConfig = loadJSON(path, sourceType, ModelConfig.class);
if (StringUtils.isNotBlank(modelConfig.getTrain().getGridConfigFile())) {
String gridConfigPath = modelConfig.getTrain().getGridConfigFile().trim();
if (sourceType == SourceType.HDFS) {
// gridsearch config file is uploaded to modelset path
gridConfigPath = new PathFinder(modelConfig).getPathBySourceType(gridConfigPath.substring(gridConfigPath.lastIndexOf(File.separator) + 1), SourceType.HDFS);
}
// Only load file content. Grid search params parsing is done in {@link GridSearch} initialization.
modelConfig.getTrain().setGridConfigFileContent(loadFileContent(gridConfigPath, sourceType));
}
return modelConfig;
}
Aggregations