Search in sources :

Example 1 with SourceType

use of ml.shifu.shifu.container.obj.RawSourceData.SourceType in project shifu by ShifuML.

the class NormStep method process.

/*
     * (non-Javadoc)
     * 
     * @see ml.shifu.common.Step#process()
     */
@Override
public List<ColumnConfig> process() throws IOException {
    LOG.info("Step Start: stats");
    long start = System.currentTimeMillis();
    LOG.info("Saving ModelConfig, ColumnConfig and then upload to HDFS ...");
    JSONUtils.writeValue(new File(pathFinder.getModelConfigPath(SourceType.LOCAL)), modelConfig);
    JSONUtils.writeValue(new File(pathFinder.getColumnConfigPath(SourceType.LOCAL)), columnConfigList);
    if (SourceType.HDFS.equals(modelConfig.getDataSet().getSource())) {
        CommonUtils.copyConfFromLocalToHDFS(modelConfig, this.pathFinder);
    }
    SourceType sourceType = modelConfig.getDataSet().getSource();
    ShifuFileUtils.deleteFile(pathFinder.getNormalizedDataPath(), sourceType);
    ShifuFileUtils.deleteFile(pathFinder.getNormalizedValidationDataPath(), sourceType);
    ShifuFileUtils.deleteFile(pathFinder.getSelectedRawDataPath(), sourceType);
    Map<String, String> paramsMap = new HashMap<String, String>();
    paramsMap.put("sampleRate", modelConfig.getNormalizeSampleRate().toString());
    paramsMap.put("sampleNegOnly", ((Boolean) modelConfig.isNormalizeSampleNegOnly()).toString());
    paramsMap.put("delimiter", CommonUtils.escapePigString(modelConfig.getDataSetDelimiter()));
    try {
        String normPigPath = null;
        if (modelConfig.getNormalize().getIsParquet()) {
            if (modelConfig.getBasic().getPostTrainOn()) {
                normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquetAndPostTrain.pig");
            } else {
                LOG.info("Post train is disabled by 'postTrainOn=false'.");
                normPigPath = pathFinder.getScriptPath("scripts/NormalizeWithParquet.pig");
            }
        } else {
            if (modelConfig.getBasic().getPostTrainOn()) {
            // this condition is for comment, no matter post train enabled or not, only norm results will be
            // stored since new post train solution
            }
            normPigPath = pathFinder.getScriptPath("scripts/Normalize.pig");
        }
        paramsMap.put(Constants.IS_COMPRESS, "true");
        paramsMap.put(Constants.IS_NORM_FOR_CLEAN, "false");
        PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
        if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
            paramsMap.put(Constants.IS_COMPRESS, "false");
            paramsMap.put(Constants.PATH_RAW_DATA, modelConfig.getValidationDataSetRawPath());
            paramsMap.put(Constants.PATH_NORMALIZED_DATA, pathFinder.getNormalizedValidationDataPath());
            PigExecutor.getExecutor().submitJob(modelConfig, normPigPath, paramsMap, modelConfig.getDataSet().getSource(), super.pathFinder);
        }
    } catch (IOException e) {
        throw new ShifuException(ShifuErrorCode.ERROR_RUNNING_PIG_JOB, e);
    } catch (Throwable e) {
        throw new RuntimeException(e);
    }
    LOG.info("Step Finished: stats with {} ms", (System.currentTimeMillis() - start));
    return columnConfigList;
}
Also used : HashMap(java.util.HashMap) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) IOException(java.io.IOException) File(java.io.File) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 2 with SourceType

use of ml.shifu.shifu.container.obj.RawSourceData.SourceType in project shifu by ShifuML.

the class SouceTypeDeserializer method deserialize.

@Override
public SourceType deserialize(JsonParser jp, DeserializationContext context) throws IOException {
    ObjectCodec oc = jp.getCodec();
    JsonNode node = oc.readTree(jp);
    for (SourceType value : SourceType.values()) {
        if (value.name().equalsIgnoreCase(node.textValue())) {
            return value;
        }
    }
    return null;
}
Also used : SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) JsonNode(com.fasterxml.jackson.databind.JsonNode) ObjectCodec(com.fasterxml.jackson.core.ObjectCodec)

Aggregations

SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)2 ObjectCodec (com.fasterxml.jackson.core.ObjectCodec)1 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 File (java.io.File)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 ShifuException (ml.shifu.shifu.exception.ShifuException)1