use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class Step method validateModelConfig.
/**
* Validate the modelconfig if it's well written.
*/
/**
* Validate the modelconfig if it's well written.
*
* @param modelConfig
* the model config
* @param step
* step in Shifu
* @throws Exception
* any exception in validation
*/
protected void validateModelConfig(ModelConfig modelConfig, ModelStep step) throws Exception {
ValidateResult result = new ValidateResult(false);
if (modelConfig == null) {
result.getCauses().add("The ModelConfig is not loaded!");
} else {
result = ModelInspector.getInspector().probe(modelConfig, step);
}
if (!result.getStatus()) {
LOG.error("ModelConfig Validation - Fail! See below:");
for (String cause : result.getCauses()) {
LOG.error("\t!!! " + cause);
}
throw new ShifuException(ShifuErrorCode.ERROR_MODELCONFIG_NOT_VALIDATION);
} else {
LOG.info("ModelConfig Validation - OK");
}
checkAlgParameter(modelConfig);
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class ModelDataEncodeProcessor method encodeModelData.
@SuppressWarnings("deprecation")
private int encodeModelData(EvalConfig evalConfig) throws IOException {
int status = 0;
RawSourceData.SourceType sourceType = this.modelConfig.getDataSet().getSource();
// clean up output directories
ShifuFileUtils.deleteFile(pathFinder.getEncodeDataPath(evalConfig), sourceType);
// prepare special parameters and execute pig
Map<String, String> paramsMap = new HashMap<String, String>();
paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
paramsMap.put("pathRawData", (evalConfig == null) ? modelConfig.getDataSetRawPath() : evalConfig.getDataSet().getDataPath());
paramsMap.put("pathEncodeData", pathFinder.getEncodeDataPath(evalConfig));
paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
paramsMap.put("evalSetName", (evalConfig == null ? TRAINING_DATA_SET : evalConfig.getName()));
paramsMap.put(Constants.IS_COMPRESS, "true");
try {
String encodePigPath = pathFinder.getScriptPath("scripts/EncodeData.pig");
;
PigExecutor.getExecutor().submitJob(modelConfig, encodePigPath, paramsMap);
Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
while (iter.hasNext()) {
JobStats jobStats = iter.next();
if (jobStats.getHadoopCounters() != null && jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER) != null) {
long totalValidCount = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("TOTAL_VALID_COUNT");
// If no basic record counter, check next one
if (totalValidCount == 0L) {
continue;
}
long invalidTagCount = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("INVALID_TAG");
LOG.info("Total valid records {} after filtering, invalid tag records {}.", totalValidCount, invalidTagCount);
if (totalValidCount > 0L && invalidTagCount * 1d / totalValidCount >= 0.8d) {
LOG.error("Too many invalid tags, please check you configuration on positive tags and negative tags.");
status = 1;
}
}
// only one pig job with such counters, break
break;
}
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
} catch (Throwable e) {
throw new RuntimeException(e);
}
return status;
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class BasicModelProcessor method runDataClean.
protected void runDataClean(boolean isToShuffle) throws IOException {
SourceType sourceType = modelConfig.getDataSet().getSource();
String cleanedDataPath = this.pathFinder.getCleanedDataPath();
LOG.info("Start to generate clean data for tree model ... ");
if (ShifuFileUtils.isFileExists(cleanedDataPath, sourceType)) {
ShifuFileUtils.deleteFile(cleanedDataPath, sourceType);
}
Map<String, String> paramsMap = new HashMap<String, String>();
paramsMap.put("sampleRate", modelConfig.getNormalizeSampleRate().toString());
paramsMap.put("sampleNegOnly", ((Boolean) modelConfig.isNormalizeSampleNegOnly()).toString());
paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
paramsMap.put("is_csv", String.valueOf(Boolean.TRUE.toString().equalsIgnoreCase(Environment.getProperty(Constants.SHIFU_OUTPUT_DATA_CSV, Boolean.FALSE.toString()))));
try {
String normPigPath = pathFinder.getScriptPath("scripts/Normalize.pig");
paramsMap.put(Constants.IS_COMPRESS, "true");
paramsMap.put(Constants.IS_NORM_FOR_CLEAN, "true");
paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getCleanedDataPath());
PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, sourceType, this.pathFinder);
// cleaned validation data
if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
String cleandedValidationDataPath = pathFinder.getCleanedValidationDataPath();
if (ShifuFileUtils.isFileExists(cleandedValidationDataPath, sourceType)) {
ShifuFileUtils.deleteFile(cleandedValidationDataPath, sourceType);
}
paramsMap.put(Constants.IS_COMPRESS, "false");
paramsMap.put(Constants.PATH_RAW_DATA, modelConfig.getValidationDataSetRawPath());
paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getCleanedValidationDataPath());
PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, sourceType, this.pathFinder);
}
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
} catch (Throwable e) {
throw new RuntimeException(e);
}
if (isToShuffle) {
MapReduceShuffle shuffler = new MapReduceShuffle(this.modelConfig);
try {
shuffler.run(pathFinder.getCleanedDataPath());
} catch (ClassNotFoundException e) {
throw new RuntimeException("Fail to shuffle the cleaned data.", e);
} catch (InterruptedException e) {
throw new RuntimeException("Fail to shuffle the cleaned data.", e);
}
}
LOG.info("Generate clean data for tree model successful.");
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class BasicModelProcessor method copyModelFiles.
public void copyModelFiles(String sourcePath, String targetPath) throws IOException {
loadModelConfig(sourcePath + File.separator + "ModelConfig.json", SourceType.LOCAL);
File targetFile = new File(targetPath);
this.modelConfig.setModelSetName(targetFile.getName());
this.modelConfig.setModelSetCreator(Environment.getProperty(Environment.SYSTEM_USER));
try {
JSONUtils.writeValue(new File(targetPath + File.separator + "ModelConfig.json"), modelConfig);
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_WRITE_MODELCONFIG, e);
}
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class CreateModelProcessor method run.
/**
* Runner, running the create model processor
*
* @throws IOException
* - when creating files
*/
@Override
public int run() throws IOException {
File modelSetFolder = new File(name);
if (modelSetFolder.exists()) {
log.error("ModelSet - {} already exists.", name);
return 1;
}
try {
log.info("Creating ModelSet Folder: " + modelSetFolder.getCanonicalPath() + "...");
FileUtils.forceMkdir(modelSetFolder);
log.info("Creating Initial ModelConfig.json ...");
// how to check hdfs
boolean enableHadoop = HDFSUtils.isDistributedMode();
if (enableHadoop) {
log.info("Enable DIST/MAPRED mode because Hadoop cluster is detected.");
} else {
log.info("Enable LOCAL mode because Hadoop cluster is not detected.");
}
modelConfig = ModelConfig.createInitModelConfig(name, alg, description, enableHadoop);
JSONUtils.writeValue(new File(modelSetFolder.getCanonicalPath() + File.separator + "ModelConfig.json"), modelConfig);
createHead(modelSetFolder.getCanonicalPath());
log.info("Step Finished: new");
} catch (ShifuException e) {
log.error("Error:" + e.getError().toString() + "; msg:" + e.getMessage(), e);
return -1;
} catch (Exception e) {
log.error("Error:" + e.getMessage(), e);
return -1;
}
return 0;
}
Aggregations