use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class ModelSpecLoaderUtils method loadGenericModels.
/**
* Load generic model from local or HDFS storage and initialize.
*
* @param modelConfig
* model config
* @param genericModelConfigs
* generic model files
* @param sourceType
* source type
* @param models
* models list to have the result
* @throws IOException
* Exception when fail to load generic models
*/
public static void loadGenericModels(ModelConfig modelConfig, List<FileStatus> genericModelConfigs, SourceType sourceType, List<BasicML> models) throws IOException {
FileSystem hdfs = HDFSUtils.getFS();
PathFinder pathFinder = new PathFinder(modelConfig);
String src = pathFinder.getModelsPath(sourceType);
File f = new File(System.getProperty(Constants.USER_DIR) + "/models");
// check if model dir is exist
if (!f.exists()) {
// source
hdfs.copyToLocalFile(// source
false, // source
new Path(src), new Path(System.getProperty(Constants.USER_DIR)), true);
}
for (FileStatus fst : genericModelConfigs) {
GenericModelConfig gmc = // loading as GenericModelConfig
CommonUtils.loadJSON(fst.getPath().toString(), sourceType, GenericModelConfig.class);
String alg = (String) gmc.getProperties().get(Constants.GENERIC_ALGORITHM);
String genericModelPath = // <usr.dir>
System.getProperty(Constants.USER_DIR) + File.separator + // + /models
Constants.MODELS;
// + File.separator + modelConfig.getBasic().getName(); // + /ModelName
gmc.getProperties().put(Constants.GENERIC_MODEL_PATH, genericModelPath);
log.info("Generic model path is : {}.", gmc.getProperties().get(Constants.GENERIC_MODEL_PATH));
if (Constants.TENSORFLOW.equals(alg)) {
try {
// Initiate a evaluator class instance which used for evaluation
Class<?> clazz = Class.forName(ComputeImplClass.Tensorflow.getClassName());
Computable computable = (Computable) clazz.newInstance();
computable.init(gmc);
GenericModel genericModel = new GenericModel(computable, gmc.getProperties());
models.add(genericModel);
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
throw new RuntimeException("Algorithm: " + alg + " is not supported in generic model yet.");
}
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CommonUtils method copyEvalDataFromLocalToHDFS.
/**
* Sync-up the evaluation data into HDFS
*
* @param modelConfig
* - ModelConfig
* @param evalName
* eval name in ModelConfig
* @throws IOException
* - error occur when copying data
*/
@SuppressWarnings("deprecation")
public static void copyEvalDataFromLocalToHDFS(ModelConfig modelConfig, String evalName) throws IOException {
EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalName);
if (evalConfig != null) {
FileSystem hdfs = HDFSUtils.getFS();
FileSystem localFs = HDFSUtils.getLocalFS();
PathFinder pathFinder = new PathFinder(modelConfig);
Path evalDir = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.LOCAL));
Path dst = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.HDFS));
if (// local evaluation folder exists
localFs.exists(evalDir) && // is directory
localFs.getFileStatus(evalDir).isDir() && !hdfs.exists(dst)) {
hdfs.copyFromLocalFile(evalDir, dst);
}
if (StringUtils.isNotBlank(evalConfig.getScoreMetaColumnNameFile())) {
hdfs.copyFromLocalFile(new Path(evalConfig.getScoreMetaColumnNameFile()), new Path(pathFinder.getEvalSetPath(evalConfig)));
}
// sync evaluation meta.column.file to hdfs
if (StringUtils.isNotBlank(evalConfig.getDataSet().getMetaColumnNameFile())) {
hdfs.copyFromLocalFile(new Path(evalConfig.getDataSet().getMetaColumnNameFile()), new Path(pathFinder.getEvalSetPath(evalConfig)));
}
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CommonUtils method getPigParamMap.
/**
* Return all parameters for pig execution.
*
* @param modelConfig
* model config
* @param sourceType
* source type
* @return map of configurations
* @throws IOException
* any io exception
* @throws IllegalArgumentException
* if modelConfig is null.
*/
public static Map<String, String> getPigParamMap(ModelConfig modelConfig, SourceType sourceType) throws IOException {
if (modelConfig == null) {
throw new IllegalArgumentException("modelConfig should not be null.");
}
PathFinder pathFinder = new PathFinder(modelConfig);
Map<String, String> pigParamMap = new HashMap<String, String>();
pigParamMap.put(Constants.NUM_PARALLEL, Environment.getInt(Environment.HADOOP_NUM_PARALLEL, 400).toString());
log.info("jar path is {}", pathFinder.getJarPath());
pigParamMap.put(Constants.PATH_JAR, pathFinder.getJarPath());
pigParamMap.put(Constants.PATH_RAW_DATA, modelConfig.getDataSetRawPath());
pigParamMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedDataPath(sourceType));
// default norm is not for clean, so set it to false, this will be overrided in Train#Norm for tree models
pigParamMap.put(Constants.IS_NORM_FOR_CLEAN, Boolean.FALSE.toString());
pigParamMap.put(Constants.PATH_PRE_TRAINING_STATS, pathFinder.getPreTrainingStatsPath(sourceType));
pigParamMap.put(Constants.PATH_STATS_BINNING_INFO, pathFinder.getUpdatedBinningInfoPath(sourceType));
pigParamMap.put(Constants.PATH_STATS_PSI_INFO, pathFinder.getPSIInfoPath(sourceType));
pigParamMap.put(Constants.WITH_SCORE, Boolean.FALSE.toString());
pigParamMap.put(Constants.STATS_SAMPLE_RATE, modelConfig.getBinningSampleRate().toString());
pigParamMap.put(Constants.PATH_MODEL_CONFIG, pathFinder.getModelConfigPath(sourceType));
pigParamMap.put(Constants.PATH_COLUMN_CONFIG, pathFinder.getColumnConfigPath(sourceType));
pigParamMap.put(Constants.PATH_SELECTED_RAW_DATA, pathFinder.getSelectedRawDataPath(sourceType));
pigParamMap.put(Constants.PATH_BIN_AVG_SCORE, pathFinder.getBinAvgScorePath(sourceType));
pigParamMap.put(Constants.PATH_TRAIN_SCORE, pathFinder.getTrainScoresPath(sourceType));
pigParamMap.put(Constants.SOURCE_TYPE, sourceType.toString());
pigParamMap.put(Constants.JOB_QUEUE, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, Constants.DEFAULT_JOB_QUEUE));
return pigParamMap;
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class CalculateStatsActor method onReceive.
/* (non-Javadoc)
* @see akka.actor.UntypedActor#onReceive(java.lang.Object)
*/
@Override
public void onReceive(Object message) throws Exception {
if (message instanceof AkkaActorInputMessage) {
resultCnt = 0;
AkkaActorInputMessage msg = (AkkaActorInputMessage) message;
List<Scanner> scanners = msg.getScanners();
log.debug("Num of Scanners: " + scanners.size());
for (Scanner scanner : scanners) {
dataLoadRef.tell(new ScanStatsRawDataMessage(scanners.size(), scanner), getSelf());
}
} else if (message instanceof StatsResultMessage) {
StatsResultMessage statsRstMsg = (StatsResultMessage) message;
ColumnConfig columnConfig = statsRstMsg.getColumnConfig();
columnConfigList.set(columnConfig.getColumnNum(), columnConfig);
resultCnt++;
if (resultCnt == columnNumToActorMap.size()) {
log.info("Received " + resultCnt + " messages. Finished Calculating Stats.");
PathFinder pathFinder = new PathFinder(modelConfig);
JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath()), columnConfigList);
getContext().system().shutdown();
}
} else if (message instanceof ExceptionMessage) {
// since some children actors meet some exception, shutdown the system
ExceptionMessage msg = (ExceptionMessage) message;
getContext().system().shutdown();
// and wrapper the exception into Return status
addExceptionIntoCondition(msg.getException());
} else {
unhandled(message);
}
}
use of ml.shifu.shifu.fs.PathFinder in project shifu by ShifuML.
the class PostTrainActor method onReceive.
/* (non-Javadoc)
* @see akka.actor.UntypedActor#onReceive(java.lang.Object)
*/
@Override
public void onReceive(Object message) throws Exception {
if (message instanceof AkkaActorInputMessage) {
resultCnt = 0;
AkkaActorInputMessage msg = (AkkaActorInputMessage) message;
List<Scanner> scanners = msg.getScanners();
log.debug("Num of Scanners: " + scanners.size());
int streamId = 0;
for (Scanner scanner : scanners) {
dataLoadRef.tell(new ScanEvalDataMessage(streamId++, scanners.size(), scanner), getSelf());
}
} else if (message instanceof StatsResultMessage) {
StatsResultMessage statsRstMsg = (StatsResultMessage) message;
ColumnConfig columnConfig = statsRstMsg.getColumnConfig();
columnConfigList.set(columnConfig.getColumnNum(), columnConfig);
resultCnt++;
log.debug("Received " + resultCnt + " messages, expected message count is:" + expectedResultCnt);
if (resultCnt == expectedResultCnt) {
log.info("Finished post-train.");
PathFinder pathFinder = new PathFinder(modelConfig);
JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath()), columnConfigList);
getContext().system().shutdown();
}
} else if (message instanceof ExceptionMessage) {
// since some children actors meet some exception, shutdown the system
ExceptionMessage msg = (ExceptionMessage) message;
getContext().system().shutdown();
// and wrapper the exception into Return status
addExceptionIntoCondition(msg.getException());
} else {
unhandled(message);
}
}
Aggregations