use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class NormStep method process.
/*
* (non-Javadoc)
*
* @see ml.shifu.common.Step#process()
*/
@Override
public List<ColumnConfig> process() throws IOException {
LOG.info("Step Start: stats");
long start = System.currentTimeMillis();
LOG.info("Saving ModelConfig, ColumnConfig and then upload to HDFS ...");
JSONUtils.writeValue(new File(pathFinder.getModelConfigPath(SourceType.LOCAL)), modelConfig);
JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath(SourceType.LOCAL)), columnConfigList);
if (SourceType.HDFS.equals(modelConfig.getDataSet().getSource())) {
CommonUtils.copyConfFromLocalToHDFS(modelConfig, this.pathFinder);
}
SourceType sourceType = modelConfig.getDataSet().getSource();
ShifuFileUtils.deleteFile(pathFinder.getNormalizedDataPath(), sourceType);
ShifuFileUtils.deleteFile(pathFinder.getNormalizedValidationDataPath(), sourceType);
ShifuFileUtils.deleteFile(pathFinder.getSelectedRawDataPath(), sourceType);
Map<String, String> paramsMap = new HashMap<String, String>();
paramsMap.put("sampleRate", modelConfig.getNormalizeSampleRate().toString());
paramsMap.put("sampleNegOnly", ((Boolean) modelConfig.isNormalizeSampleNegOnly()).toString());
paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
try {
String normPigPath = null;
if (modelConfig.getNormalize().getIsParquet()) {
if (modelConfig.getBasic().getPostTrainOn()) {
normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquetAndPostTrain.pig");
} else {
LOG.info("Post train is disabled by 'postTrainOn=false'.");
normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquet.pig");
}
} else {
if (modelConfig.getBasic().getPostTrainOn()) {
// this condition is for comment, no matter post train enabled or not, only norm results will be
// stored since new post train solution
}
normPigPath = pathFinder.getScriptPath("scripts/Normalize.pig");
}
paramsMap.put(Constants.IS_COMPRESS, "true");
paramsMap.put(Constants.IS_NORM_FOR_CLEAN, "false");
PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
paramsMap.put(Constants.IS_COMPRESS, "false");
paramsMap.put(Constants.PATH_RAW_DATA, modelConfig.getValidationDataSetRawPath());
paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedValidationDataPath());
PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
}
} catch (IOException e) {
throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
} catch (Throwable e) {
throw new RuntimeException(e);
}
LOG.info("Step Finished: stats with {} ms", (System.currentTimeMillis() - start));
return columnConfigList;
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class Step method checkAlgParameter.
private void checkAlgParameter(ModelConfig modelConfig) {
String alg = modelConfig.getAlgorithm();
Map<String, Object> param = modelConfig.getParams();
LOG.info("Check algorithm parameter");
if (alg.equalsIgnoreCase("LR")) {
if (!param.containsKey("LearningRate")) {
param = new LinkedHashMap<String, Object>();
param.put("LearningRate", 0.1);
modelConfig.setParams(param);
}
} else if (alg.equalsIgnoreCase("NN")) {
if (!param.containsKey("Propagation")) {
param = new LinkedHashMap<String, Object>();
param.put("Propagation", "Q");
param.put("LearningRate", 0.1);
param.put("NumHiddenLayers", 2);
List<Integer> nodes = new ArrayList<Integer>();
nodes.add(20);
nodes.add(10);
param.put("NumHiddenNodes", nodes);
List<String> func = new ArrayList<String>();
func.add("tanh");
func.add("tanh");
param.put("ActivationFunc", func);
modelConfig.setParams(param);
}
} else if (alg.equalsIgnoreCase("SVM")) {
if (!param.containsKey("Kernel")) {
param = new LinkedHashMap<String, Object>();
param.put("Kernel", "linear");
param.put("Gamma", 1.);
param.put("Const", 1.);
modelConfig.setParams(param);
}
} else if (alg.equalsIgnoreCase("DT")) {
// do nothing
} else if (alg.equalsIgnoreCase("RF")) {
if (!param.containsKey("FeatureSubsetStrategy")) {
param = new LinkedHashMap<String, Object>();
param.put("FeatureSubsetStrategy", "all");
param.put("MaxDepth", 10);
param.put("MaxStatsMemoryMB", 256);
param.put("Impurity", "entropy");
modelConfig.setParams(param);
}
} else if (alg.equalsIgnoreCase("GBT")) {
if (!param.containsKey("FeatureSubsetStrategy")) {
param = new LinkedHashMap<String, Object>();
param.put("FeatureSubsetStrategy", "all");
param.put("MaxDepth", 10);
param.put("MaxStatsMemoryMB", 256);
param.put("Impurity", "entropy");
param.put("Loss", "squared");
modelConfig.setParams(param);
}
} else {
throw new ShifuException(ShifuErrorCode.ERROR_UNSUPPORT_ALG);
}
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class DataMerger method doMerge.
public boolean doMerge() throws IOException {
if (ModelBasicConf.RunMode.LOCAL.equals(runMode)) {
// do local data merge
genOutputHeader();
mergeData();
} else if (ModelBasicConf.RunMode.MAPRED.equals(runMode) || ModelBasicConf.RunMode.DIST.equals(runMode)) {
// use pig to do data merge
runMapReduceToMerge();
} else {
throw new ShifuException(ShifuErrorCode.ERROR_UNSUPPORT_MODE);
}
return true;
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class GridSearch method convertItemValue.
private Object convertItemValue(Map<String, MetaItem> metaWarehouse, String itemKey, String itemValueStr) throws ShifuException {
MetaItem itemMeta = metaWarehouse.get(getItemKeyInMeta(itemKey));
if (itemMeta == null) {
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, "Train param name not recognized: " + itemKey);
}
itemValueStr = itemValueStr.trim();
if (itemMeta.getType().equals("text")) {
return itemValueStr;
} else if (itemMeta.getType().equals("integer") || itemMeta.getType().equals("int")) {
try {
return Integer.parseInt(itemValueStr);
} catch (NumberFormatException e) {
String message = String.format("Train param %s should be integer type, actual value got is %s", itemKey, itemValueStr);
LOG.error(message);
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
}
} else if (itemMeta.getType().equals("number")) {
try {
return Double.parseDouble(itemValueStr);
} catch (NumberFormatException e) {
String message = String.format("Train param %s should be number type, actual value got is %s", itemKey, itemValueStr);
LOG.error(message);
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
}
} else if (itemMeta.getType().equals("float")) {
try {
System.out.println("create float value for " + itemValueStr);
return Float.parseFloat(itemValueStr);
} catch (NumberFormatException e) {
String message = String.format("Train param %s should be number type, actual value got is %s", itemKey, itemValueStr);
LOG.error(message);
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
}
} else if (itemMeta.getType().equals("boolean")) {
return itemValueStr.equalsIgnoreCase("true");
} else if (itemMeta.getType().equals("list")) {
if (itemKey.equals("NumHiddenNodes") && itemMeta.getElementType().equals("number") && itemValueStr.matches("\\[[0-9\\+ ,]+\\]") && itemValueStr.length() > 2) {
List<Integer> itemValue = new ArrayList<Integer>();
itemValueStr = itemValueStr.substring(1, itemValueStr.length() - 1).trim();
String[] splits = itemValueStr.split(",");
try {
for (String valueSplit : splits) {
itemValue.add(Integer.parseInt(valueSplit.trim()));
}
} catch (NumberFormatException e) {
String message = String.format("Train param %s should be integer type, actual value got is %s", itemKey, itemValueStr);
LOG.error(message);
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
}
return itemValue;
} else if (itemKey.equals("ActivationFunc") && itemMeta.getElementType().equals("text") && itemValueStr.matches("\\[[a-zA-Z0-9 ,]+\\]") && itemValueStr.length() > 2) {
List<String> itemValue = new ArrayList<String>();
itemValueStr = itemValueStr.substring(1, itemValueStr.length() - 1).trim();
String[] splits = itemValueStr.split(",");
for (String valueSplit : splits) {
itemValue.add(valueSplit.trim());
}
return itemValue;
}
}
throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, "Train param and value not recognized: " + itemKey + ":" + itemValueStr);
}
use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.
the class DataFilterUDF method exec.
public Tuple exec(Tuple input) throws IOException {
Tuple tuple = TupleFactory.getInstance().newTuple();
if (input.size() < this.columnConfigList.size()) {
throw new ShifuException(ShifuErrorCode.ERROR_NO_EQUAL_COLCONFIG);
}
List<Object> filteredData = DataSampler.filter(tagColumnNum, posTags, negTags, input.getAll(), sampleRate, sampleNegOnly);
if (filteredData == null) {
return null;
}
for (Object o : filteredData) {
tuple.append(o);
}
return tuple;
}
Aggregations