Search in sources :

Example 16 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class NormStep method process.

/*
     * (non-Javadoc)
     * 
     * @see ml.shifu.common.Step#process()
     */
@Override
public List<ColumnConfig> process() throws IOException {
    LOG.info("Step Start: stats");
    long start = System.currentTimeMillis();
    LOG.info("Saving ModelConfig, ColumnConfig and then upload to HDFS ...");
    JSONUtils.writeValue(new File(pathFinder.getModelConfigPath(SourceType.LOCAL)), modelConfig);
    JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath(SourceType.LOCAL)), columnConfigList);
    if (SourceType.HDFS.equals(modelConfig.getDataSet().getSource())) {
        CommonUtils.copyConfFromLocalToHDFS(modelConfig, this.pathFinder);
    }
    SourceType sourceType = modelConfig.getDataSet().getSource();
    ShifuFileUtils.deleteFile(pathFinder.getNormalizedDataPath(), sourceType);
    ShifuFileUtils.deleteFile(pathFinder.getNormalizedValidationDataPath(), sourceType);
    ShifuFileUtils.deleteFile(pathFinder.getSelectedRawDataPath(), sourceType);
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put("sampleRate", modelConfig.getNormalizeSampleRate().toString());
    paramsMap.put("sampleNegOnly", ((Boolean) modelConfig.isNormalizeSampleNegOnly()).toString());
    paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
    try {
        String normPigPath = null;
        if (modelConfig.getNormalize().getIsParquet()) {
            if (modelConfig.getBasic().getPostTrainOn()) {
                normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquetAndPostTrain.pig");
            } else {
                LOG.info("Post train is disabled by 'postTrainOn=false'.");
                normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquet.pig");
            }
        } else {
            if (modelConfig.getBasic().getPostTrainOn()) {
            // this condition is for comment, no matter post train enabled or not, only norm results will be
            // stored since new post train solution
            }
            normPigPath = pathFinder.getScriptPath("scripts/Normalize.pig");
        }
        paramsMap.put(Constants.IS_COMPRESS, "true");
        paramsMap.put(Constants.IS_NORM_FOR_CLEAN, "false");
        PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
        if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
            paramsMap.put(Constants.IS_COMPRESS, "false");
            paramsMap.put(Constants.PATH_RAW_DATA, modelConfig.getValidationDataSetRawPath());
            paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedValidationDataPath());
            PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
        }
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    LOG.info("Step Finished: stats with {} ms", (System.currentTimeMillis() - start));
    return columnConfigList;
}
Also used : HashMap(java.util.HashMap) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) IOException(java.io.IOException) File(java.io.File) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 17 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class Step method checkAlgParameter.

private void checkAlgParameter(ModelConfig modelConfig) {
    String alg = modelConfig.getAlgorithm();
    Map<String, Object> param = modelConfig.getParams();
    LOG.info("Check algorithm parameter");
    if (alg.equalsIgnoreCase("LR")) {
        if (!param.containsKey("LearningRate")) {
            param = new LinkedHashMap<String, Object>();
            param.put("LearningRate", 0.1);
            modelConfig.setParams(param);
        }
    } else if (alg.equalsIgnoreCase("NN")) {
        if (!param.containsKey("Propagation")) {
            param = new LinkedHashMap<String, Object>();
            param.put("Propagation", "Q");
            param.put("LearningRate", 0.1);
            param.put("NumHiddenLayers", 2);
            List<Integer> nodes = new ArrayList<Integer>();
            nodes.add(20);
            nodes.add(10);
            param.put("NumHiddenNodes", nodes);
            List<String> func = new ArrayList<String>();
            func.add("tanh");
            func.add("tanh");
            param.put("ActivationFunc", func);
            modelConfig.setParams(param);
        }
    } else if (alg.equalsIgnoreCase("SVM")) {
        if (!param.containsKey("Kernel")) {
            param = new LinkedHashMap<String, Object>();
            param.put("Kernel", "linear");
            param.put("Gamma", 1.);
            param.put("Const", 1.);
            modelConfig.setParams(param);
        }
    } else if (alg.equalsIgnoreCase("DT")) {
    // do nothing
    } else if (alg.equalsIgnoreCase("RF")) {
        if (!param.containsKey("FeatureSubsetStrategy")) {
            param = new LinkedHashMap<String, Object>();
            param.put("FeatureSubsetStrategy", "all");
            param.put("MaxDepth", 10);
            param.put("MaxStatsMemoryMB", 256);
            param.put("Impurity", "entropy");
            modelConfig.setParams(param);
        }
    } else if (alg.equalsIgnoreCase("GBT")) {
        if (!param.containsKey("FeatureSubsetStrategy")) {
            param = new LinkedHashMap<String, Object>();
            param.put("FeatureSubsetStrategy", "all");
            param.put("MaxDepth", 10);
            param.put("MaxStatsMemoryMB", 256);
            param.put("Impurity", "entropy");
            param.put("Loss", "squared");
            modelConfig.setParams(param);
        }
    } else {
        throw new ShifuException(ShifuErrorCode.ERROR_UNSUPPORT_ALG);
    }
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) ShifuException(ml.shifu.shifu.exception.ShifuException) LinkedHashMap(java.util.LinkedHashMap)

Example 18 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class DataMerger method doMerge.

public boolean doMerge() throws IOException {
    if (ModelBasicConf.RunMode.LOCAL.equals(runMode)) {
        // do local data merge
        genOutputHeader();
        mergeData();
    } else if (ModelBasicConf.RunMode.MAPRED.equals(runMode) || ModelBasicConf.RunMode.DIST.equals(runMode)) {
        // use pig to do data merge
        runMapReduceToMerge();
    } else {
        throw new ShifuException(ShifuErrorCode.ERROR_UNSUPPORT_MODE);
    }
    return true;
}
Also used : ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 19 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class GridSearch method convertItemValue.

private Object convertItemValue(Map<String, MetaItem> metaWarehouse, String itemKey, String itemValueStr) throws ShifuException {
    MetaItem itemMeta = metaWarehouse.get(getItemKeyInMeta(itemKey));
    if (itemMeta == null) {
        throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, "Train param name not recognized: " + itemKey);
    }
    itemValueStr = itemValueStr.trim();
    if (itemMeta.getType().equals("text")) {
        return itemValueStr;
    } else if (itemMeta.getType().equals("integer") || itemMeta.getType().equals("int")) {
        try {
            return Integer.parseInt(itemValueStr);
        } catch (NumberFormatException e) {
            String message = String.format("Train param %s should be integer type, actual value got is %s", itemKey, itemValueStr);
            LOG.error(message);
            throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
        }
    } else if (itemMeta.getType().equals("number")) {
        try {
            return Double.parseDouble(itemValueStr);
        } catch (NumberFormatException e) {
            String message = String.format("Train param %s should be number type, actual value got is %s", itemKey, itemValueStr);
            LOG.error(message);
            throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
        }
    } else if (itemMeta.getType().equals("float")) {
        try {
            System.out.println("create float value for " + itemValueStr);
            return Float.parseFloat(itemValueStr);
        } catch (NumberFormatException e) {
            String message = String.format("Train param %s should be number type, actual value got is %s", itemKey, itemValueStr);
            LOG.error(message);
            throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
        }
    } else if (itemMeta.getType().equals("boolean")) {
        return itemValueStr.equalsIgnoreCase("true");
    } else if (itemMeta.getType().equals("list")) {
        if (itemKey.equals("NumHiddenNodes") && itemMeta.getElementType().equals("number") && itemValueStr.matches("\\[[0-9\\+ ,]+\\]") && itemValueStr.length() > 2) {
            List<Integer> itemValue = new ArrayList<Integer>();
            itemValueStr = itemValueStr.substring(1, itemValueStr.length() - 1).trim();
            String[] splits = itemValueStr.split(",");
            try {
                for (String valueSplit : splits) {
                    itemValue.add(Integer.parseInt(valueSplit.trim()));
                }
            } catch (NumberFormatException e) {
                String message = String.format("Train param %s should be integer type, actual value got is %s", itemKey, itemValueStr);
                LOG.error(message);
                throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, e, message);
            }
            return itemValue;
        } else if (itemKey.equals("ActivationFunc") && itemMeta.getElementType().equals("text") && itemValueStr.matches("\\[[a-zA-Z0-9 ,]+\\]") && itemValueStr.length() > 2) {
            List<String> itemValue = new ArrayList<String>();
            itemValueStr = itemValueStr.substring(1, itemValueStr.length() - 1).trim();
            String[] splits = itemValueStr.split(",");
            for (String valueSplit : splits) {
                itemValue.add(valueSplit.trim());
            }
            return itemValue;
        }
    }
    throw new ShifuException(ShifuErrorCode.ERROR_GRID_SEARCH_FILE_CONFIG, "Train param and value not recognized: " + itemKey + ":" + itemValueStr);
}
Also used : MetaItem(ml.shifu.shifu.container.meta.MetaItem) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 20 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class DataFilterUDF method exec.

public Tuple exec(Tuple input) throws IOException {
    Tuple tuple = TupleFactory.getInstance().newTuple();
    if (input.size() < this.columnConfigList.size()) {
        throw new ShifuException(ShifuErrorCode.ERROR_NO_EQUAL_COLCONFIG);
    }
    List<Object> filteredData = DataSampler.filter(tagColumnNum, posTags, negTags, input.getAll(), sampleRate, sampleNegOnly);
    if (filteredData == null) {
        return null;
    }
    for (Object o : filteredData) {
        tuple.append(o);
    }
    return tuple;
}
Also used : ShifuException(ml.shifu.shifu.exception.ShifuException) Tuple(org.apache.pig.data.Tuple)

Aggregations

ShifuException (ml.shifu.shifu.exception.ShifuException)39 IOException (java.io.IOException)22 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)12 HashMap (java.util.HashMap)8 ArrayList (java.util.ArrayList)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)5 File (java.io.File)4 Scanner (java.util.Scanner)4 Path (org.apache.hadoop.fs.Path)4 SourceFile (ml.shifu.shifu.fs.SourceFile)3 JobStats (org.apache.pig.tools.pigstats.JobStats)3 BufferedReader (java.io.BufferedReader)2 ConfusionMatrixObject (ml.shifu.shifu.container.ConfusionMatrixObject)2 EvalConfig (ml.shifu.shifu.container.obj.EvalConfig)2 RawSourceData (ml.shifu.shifu.container.obj.RawSourceData)2 AbstractStatsExecutor (ml.shifu.shifu.core.processor.stats.AbstractStatsExecutor)2 AkkaStatsWorker (ml.shifu.shifu.core.processor.stats.AkkaStatsWorker)2 DIBStatsExecutor (ml.shifu.shifu.core.processor.stats.DIBStatsExecutor)2 MunroPatIStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatIStatsExecutor)2 MunroPatStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatStatsExecutor)2