Search in sources :

Example 26 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class Step method validateModelConfig.

/**
 * Validate the modelconfig if it's well written.
 */
/**
 * Validate the modelconfig if it's well written.
 *
 * @param modelConfig
 *            the model config
 * @param step
 *            step in Shifu
 * @throws Exception
 *             any exception in validation
 */
protected void validateModelConfig(ModelConfig modelConfig, ModelStep step) throws Exception {
    ValidateResult result = new ValidateResult(false);
    if (modelConfig == null) {
        result.getCauses().add("The ModelConfig is not loaded!");
    } else {
        result = ModelInspector.getInspector().probe(modelConfig, step);
    }
    if (!result.getStatus()) {
        LOG.error("ModelConfig Validation - Fail! See below:");
        for (String cause : result.getCauses()) {
            LOG.error("\t!!! " + cause);
        }
        throw new ShifuException(ShifuErrorCode.ERROR_MODELCONFIG_NOT_VALIDATION);
    } else {
        LOG.info("ModelConfig Validation - OK");
    }
    checkAlgParameter(modelConfig);
}
Also used : ValidateResult(ml.shifu.shifu.container.meta.ValidateResult) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 27 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class ModelDataEncodeProcessor method encodeModelData.

@SuppressWarnings("deprecation")
private int encodeModelData(EvalConfig evalConfig) throws IOException {
    int status = 0;
    RawSourceData.SourceType sourceType = this.modelConfig.getDataSet().getSource();
    // clean up output directories
    ShifuFileUtils.deleteFile(pathFinder.getEncodeDataPath(evalConfig), sourceType);
    // prepare special parameters and execute pig
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put(Constants.SOURCE_TYPE, sourceType.toString());
    paramsMap.put("pathRawData", (evalConfig == null) ? modelConfig.getDataSetRawPath() : evalConfig.getDataSet().getDataPath());
    paramsMap.put("pathEncodeData", pathFinder.getEncodeDataPath(evalConfig));
    paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
    paramsMap.put("evalSetName", (evalConfig == null ? TRAINING_DATA_SET : evalConfig.getName()));
    paramsMap.put(Constants.IS_COMPRESS, "true");
    try {
        String encodePigPath = pathFinder.getScriptPath("scripts/EncodeData.pig");
        ;
        PigExecutor.getExecutor().submitJob(modelConfig, encodePigPath, paramsMap);
        Iterator<JobStats> iter = PigStats.get().getJobGraph().iterator();
        while (iter.hasNext()) {
            JobStats jobStats = iter.next();
            if (jobStats.getHadoopCounters() != null && jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER) != null) {
                long totalValidCount = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("TOTAL_VALID_COUNT");
                // If no basic record counter, check next one
                if (totalValidCount == 0L) {
                    continue;
                }
                long invalidTagCount = jobStats.getHadoopCounters().getGroup(Constants.SHIFU_GROUP_COUNTER).getCounter("INVALID_TAG");
                LOG.info("Total valid records {} after filtering, invalid tag records {}.", totalValidCount, invalidTagCount);
                if (totalValidCount > 0L && invalidTagCount * 1d / totalValidCount >= 0.8d) {
                    LOG.error("Too many invalid tags, please check you configuration on positive tags and negative tags.");
                    status = 1;
                }
            }
            // only one pig job with such counters, break
            break;
        }
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    return status;
}
Also used : SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) IOException(java.io.IOException) RawSourceData(ml.shifu.shifu.container.obj.RawSourceData) JobStats(org.apache.pig.tools.pigstats.JobStats) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 28 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class BasicModelProcessor method runDataClean.

protected void runDataClean(boolean isToShuffle) throws IOException {
    SourceType sourceType = modelConfig.getDataSet().getSource();
    String cleanedDataPath = this.pathFinder.getCleanedDataPath();
    LOG.info("Start to generate clean data for tree model ... ");
    if (ShifuFileUtils.isFileExists(cleanedDataPath, sourceType)) {
        ShifuFileUtils.deleteFile(cleanedDataPath, sourceType);
    }
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put("sampleRate", modelConfig.getNormalizeSampleRate().toString());
    paramsMap.put("sampleNegOnly", ((Boolean) modelConfig.isNormalizeSampleNegOnly()).toString());
    paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
    paramsMap.put("is_csv", String.valueOf(Boolean.TRUE.toString().equalsIgnoreCase(Environment.getProperty(Constants.SHIFU_OUTPUT_DATA_CSV, Boolean.FALSE.toString()))));
    try {
        String normPigPath = pathFinder.getScriptPath("scripts/Normalize.pig");
        paramsMap.put(Constants.IS_COMPRESS, "true");
        paramsMap.put(Constants.IS_NORM_FOR_CLEAN, "true");
        paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getCleanedDataPath());
        PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, sourceType, this.pathFinder);
        // cleaned validation data
        if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
            String cleandedValidationDataPath = pathFinder.getCleanedValidationDataPath();
            if (ShifuFileUtils.isFileExists(cleandedValidationDataPath, sourceType)) {
                ShifuFileUtils.deleteFile(cleandedValidationDataPath, sourceType);
            }
            paramsMap.put(Constants.IS_COMPRESS, "false");
            paramsMap.put(Constants.PATH_RAW_DATA, modelConfig.getValidationDataSetRawPath());
            paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getCleanedValidationDataPath());
            PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, sourceType, this.pathFinder);
        }
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    if (isToShuffle) {
        MapReduceShuffle shuffler = new MapReduceShuffle(this.modelConfig);
        try {
            shuffler.run(pathFinder.getCleanedDataPath());
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Fail to shuffle the cleaned data.", e);
        } catch (InterruptedException e) {
            throw new RuntimeException("Fail to shuffle the cleaned data.", e);
        }
    }
    LOG.info("Generate clean data for tree model successful.");
}
Also used : SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) MapReduceShuffle(ml.shifu.shifu.core.shuffle.MapReduceShuffle) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 29 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class BasicModelProcessor method copyModelFiles.

public void copyModelFiles(String sourcePath, String targetPath) throws IOException {
    loadModelConfig(sourcePath + File.separator + "ModelConfig.json", SourceType.LOCAL);
    File targetFile = new File(targetPath);
    this.modelConfig.setModelSetName(targetFile.getName());
    this.modelConfig.setModelSetCreator(Environment.getProperty(Environment.SYSTEM_USER));
    try {
        JSONUtils.writeValue(new File(targetPath + File.separator + "ModelConfig.json"), modelConfig);
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_WRITE_MODELCONFIG, e);
    }
}
Also used : SourceFile(ml.shifu.shifu.fs.SourceFile) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 30 with ShifuException

use of ml.shifu.shifu.exception.ShifuException in project shifu by ShifuML.

the class CreateModelProcessor method run.

/**
 * Runner, running the create model processor
 *
 * @throws IOException
 *             - when creating files
 */
@Override
public int run() throws IOException {
    File modelSetFolder = new File(name);
    if (modelSetFolder.exists()) {
        log.error("ModelSet - {} already exists.", name);
        return 1;
    }
    try {
        log.info("Creating ModelSet Folder: " + modelSetFolder.getCanonicalPath() + "...");
        FileUtils.forceMkdir(modelSetFolder);
        log.info("Creating Initial ModelConfig.json ...");
        // how to check hdfs
        boolean enableHadoop = HDFSUtils.isDistributedMode();
        if (enableHadoop) {
            log.info("Enable DIST/MAPRED mode because Hadoop cluster is detected.");
        } else {
            log.info("Enable LOCAL mode because Hadoop cluster is not detected.");
        }
        modelConfig = ModelConfig.createInitModelConfig(name, alg, description, enableHadoop);
        JSONUtils.writeValue(new File(modelSetFolder.getCanonicalPath() + File.separator + "ModelConfig.json"), modelConfig);
        createHead(modelSetFolder.getCanonicalPath());
        log.info("Step Finished: new");
    } catch (ShifuException e) {
        log.error("Error:" + e.getError().toString() + "; msg:" + e.getMessage(), e);
        return -1;
    } catch (Exception e) {
        log.error("Error:" + e.getMessage(), e);
        return -1;
    }
    return 0;
}
Also used : File(java.io.File) ShifuException(ml.shifu.shifu.exception.ShifuException) ShifuException(ml.shifu.shifu.exception.ShifuException) IOException(java.io.IOException)

Aggregations

ShifuException (ml.shifu.shifu.exception.ShifuException)39 IOException (java.io.IOException)22 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)12 HashMap (java.util.HashMap)8 ArrayList (java.util.ArrayList)5 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)5 File (java.io.File)4 Scanner (java.util.Scanner)4 Path (org.apache.hadoop.fs.Path)4 SourceFile (ml.shifu.shifu.fs.SourceFile)3 JobStats (org.apache.pig.tools.pigstats.JobStats)3 BufferedReader (java.io.BufferedReader)2 ConfusionMatrixObject (ml.shifu.shifu.container.ConfusionMatrixObject)2 EvalConfig (ml.shifu.shifu.container.obj.EvalConfig)2 RawSourceData (ml.shifu.shifu.container.obj.RawSourceData)2 AbstractStatsExecutor (ml.shifu.shifu.core.processor.stats.AbstractStatsExecutor)2 AkkaStatsWorker (ml.shifu.shifu.core.processor.stats.AkkaStatsWorker)2 DIBStatsExecutor (ml.shifu.shifu.core.processor.stats.DIBStatsExecutor)2 MunroPatIStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatIStatsExecutor)2 MunroPatStatsExecutor (ml.shifu.shifu.core.processor.stats.MunroPatStatsExecutor)2