Search in sources :

Example 1 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class VarSelectModelProcessor method postProcessFIVarSelect.

private void postProcessFIVarSelect(Map<Integer, MutablePair<String, Double>> importances) throws IOException {
    int selectCnt = 0;
    for (ColumnConfig config : super.columnConfigList) {
        // enable ForceSelect
        if (config.isForceSelect()) {
            config.setFinalSelect(true);
            selectCnt++;
            log.info("Variable {} is selected, since it is in ForceSelect list.", config.getColumnName());
        }
    }
    VariableSelector.setFilterNumberByFilterOutRatio(this.modelConfig, this.columnConfigList);
    int targetCnt = this.modelConfig.getVarSelectFilterNum();
    List<Integer> candidateColumnIdList = new ArrayList<Integer>();
    candidateColumnIdList.addAll(importances.keySet());
    int i = 0;
    int candidateCount = candidateColumnIdList.size();
    // force-selected variables
    for (ColumnConfig columnConfig : this.columnConfigList) {
        if (columnConfig.isFinalSelect()) {
            columnConfig.setFinalSelect(true);
        }
    }
    Set<NSColumn> userCandidateColumns = CommonUtils.loadCandidateColumns(modelConfig);
    while (selectCnt < targetCnt && i < targetCnt) {
        if (i >= candidateCount) {
            log.warn("Var select finish due to feature importance count {} is less than target var count {}", candidateCount, targetCnt);
            break;
        }
        Integer columnId = candidateColumnIdList.get(i++);
        ColumnConfig columnConfig = this.columnConfigList.get(columnId);
        if (CollectionUtils.isNotEmpty(userCandidateColumns) && !userCandidateColumns.contains(new NSColumn(columnConfig.getColumnName()))) {
            log.info("Variable {} is not in user's candidate list. Skip it.", columnConfig.getColumnName());
        } else if (!columnConfig.isForceSelect() && !columnConfig.isForceRemove()) {
            columnConfig.setFinalSelect(true);
            selectCnt++;
            log.info("Variable {} is selected.", columnConfig.getColumnName());
        }
    }
    log.info("{} variables are selected.", selectCnt);
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) NSColumn(ml.shifu.shifu.column.NSColumn)

Example 2 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class VariableSelector method selectByFilter.

// return the list of selected column nums
public List<ColumnConfig> selectByFilter() throws IOException {
    log.info("    - Method: Filter");
    int ptrKs = 0, ptrIv = 0, ptrPareto = 0, cntByForce = 0;
    VariableSelector.setFilterNumberByFilterOutRatio(this.modelConfig, this.columnConfigList);
    log.info("Start Variable Selection...");
    log.info("\t VarSelectEnabled: " + modelConfig.getVarSelectFilterEnabled());
    log.info("\t VarSelectBy: " + modelConfig.getVarSelectFilterBy());
    log.info("\t VarSelectNum: " + modelConfig.getVarSelectFilterNum());
    List<Integer> selectedColumnNumList = new ArrayList<Integer>();
    List<ColumnConfig> ksList = new ArrayList<ColumnConfig>();
    List<ColumnConfig> ivList = new ArrayList<ColumnConfig>();
    List<Tuple> paretoList = new ArrayList<Tuple>();
    Set<NSColumn> candidateColumns = CommonUtils.loadCandidateColumns(modelConfig);
    boolean hasCandidates = CommonUtils.hasCandidateColumns(columnConfigList);
    int cntSelected = 0;
    for (ColumnConfig config : this.columnConfigList) {
        if (config == null) {
            continue;
        }
        if (config.isMeta() || config.isTarget()) {
            log.info("\t Skip meta, weight or target column: " + config.getColumnName());
        } else if (config.isForceRemove()) {
            log.info("\t ForceRemove: " + config.getColumnName());
        } else if (config.isForceSelect()) {
            log.info("\t ForceSelect: " + config.getColumnName());
            if (config.getMean() == null || config.getStdDev() == null) {
                // TODO - check the mean of categorical variable could be null
                log.info("\t ForceSelect Failed: mean/stdDev must not be null");
            } else {
                selectedColumnNumList.add(config.getColumnNum());
                cntSelected++;
                cntByForce++;
            }
        } else if (!CommonUtils.isGoodCandidate(config, hasCandidates)) {
            log.info("\t Incomplete info(please check KS, IV, Mean, or StdDev fields): " + config.getColumnName() + " or it is not in candidate list");
        } else if (CollectionUtils.isNotEmpty(candidateColumns) && !candidateColumns.contains(new NSColumn(config.getColumnName()))) {
            log.info("\t Not in candidate list, Skip: " + config.getColumnName());
        } else if ((config.isCategorical() && !modelConfig.isCategoricalDisabled()) || config.isNumerical()) {
            ksList.add(config);
            ivList.add(config);
            if (config != null && config.getColumnStats() != null) {
                Double ks = config.getKs();
                Double iv = config.getIv();
                paretoList.add(new Tuple(config.getColumnNum(), ks == null ? 0d : ks, iv == null ? 0d : iv));
            }
        }
    }
    // not enabled filter, so only select forceSelect columns
    if (!this.modelConfig.getVarSelectFilterEnabled()) {
        log.info("Summary:");
        log.info("\tSelected Variables: " + cntSelected);
        if (cntByForce != 0) {
            log.info("\t- By Force: " + cntByForce);
        }
        for (int n : selectedColumnNumList) {
            this.columnConfigList.get(n).setFinalSelect(true);
        }
        return columnConfigList;
    }
    String key = this.modelConfig.getVarSelectFilterBy();
    Collections.sort(ksList, new ColumnConfigComparator("ks"));
    Collections.sort(ivList, new ColumnConfigComparator("iv"));
    List<Tuple> newParetoList = sortByPareto(paretoList);
    int expectedVarNum = Math.min(cntSelected + ksList.size(), modelConfig.getVarSelectFilterNum());
    log.info("Expected selected columns:" + expectedVarNum);
    // reset to false at first.
    resetFinalSelect();
    ColumnConfig config = null;
    while (cntSelected < expectedVarNum) {
        if (key.equalsIgnoreCase("ks")) {
            config = ksList.get(ptrKs);
            selectedColumnNumList.add(config.getColumnNum());
            ptrKs++;
            log.info("\t SelectedByKS=" + config.getKs() + "(Rank=" + ptrKs + "): " + config.getColumnName());
            cntSelected++;
        } else if (key.equalsIgnoreCase("iv")) {
            config = ivList.get(ptrIv);
            selectedColumnNumList.add(config.getColumnNum());
            ptrIv++;
            log.info("\t SelectedByIV=" + config.getIv() + "(Rank=" + ptrIv + "): " + config.getColumnName());
            cntSelected++;
        } else if (key.equalsIgnoreCase("mix")) {
            config = ksList.get(ptrKs);
            if (selectedColumnNumList.contains(config.getColumnNum())) {
                log.info("\t Variable Already Selected: " + config.getColumnName());
                ptrKs++;
            } else {
                selectedColumnNumList.add(config.getColumnNum());
                ptrKs++;
                log.info("\t SelectedByKS=" + config.getKs() + "(Rank=" + ptrKs + "): " + config.getColumnName());
                cntSelected++;
            }
            if (cntSelected == expectedVarNum) {
                break;
            }
            config = ivList.get(ptrIv);
            if (selectedColumnNumList.contains(config.getColumnNum())) {
                log.info("\t Variable Already Selected: " + config.getColumnName());
                ptrIv++;
            } else {
                selectedColumnNumList.add(config.getColumnNum());
                ptrIv++;
                log.info("\t SelectedByIV=" + config.getIv() + "(Rank=" + ptrIv + "): " + config.getColumnName());
                cntSelected++;
            }
        } else if (key.equalsIgnoreCase("pareto")) {
            if (ptrPareto >= newParetoList.size()) {
                config = ksList.get(ptrKs);
                if (selectedColumnNumList.contains(config.getColumnNum())) {
                    log.info("\t Variable Already Selected: " + config.getColumnName());
                } else {
                    selectedColumnNumList.add(config.getColumnNum());
                    log.info("\t SelectedByKS=" + config.getKs() + "(Rank=" + ptrKs + newParetoList.size() + "): " + config.getColumnName());
                    cntSelected++;
                }
                ptrKs++;
            } else {
                int columnNum = newParetoList.get(ptrPareto).columnNum;
                selectedColumnNumList.add(columnNum);
                log.info("\t SelectedByPareto " + columnConfigList.get(columnNum).getColumnName());
                ptrPareto++;
                cntSelected++;
            }
        }
    }
    log.info("Summary:");
    log.info("\t Selected Variables: " + cntSelected);
    if (cntByForce != 0) {
        log.info("\t - By Force: " + cntByForce);
    }
    if (ptrPareto != 0) {
        log.info("\t - By Pareto: " + ptrPareto);
    }
    if (ptrKs != 0) {
        log.info("\t - By KS: " + ptrKs);
    }
    if (ptrIv != 0) {
        log.info("\t - By IV: " + ptrIv);
    }
    // update column config list and set finalSelect to true
    for (int n : selectedColumnNumList) {
        // get ColumnConfig by column id. The id may not the position in array list after support segments
        ColumnConfig columnConfig = CommonUtils.getColumnConfig(this.columnConfigList, n);
        if (columnConfig != null) {
            columnConfig.setFinalSelect(true);
        }
    }
    return columnConfigList;
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) ColumnConfigComparator(ml.shifu.shifu.container.obj.ColumnConfig.ColumnConfigComparator) NSColumn(ml.shifu.shifu.column.NSColumn)

Example 3 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class DataPurifier method isFilter.

public Boolean isFilter(String record) {
    if (dataFilterExpr == null) {
        return true;
    }
    String[] fields = CommonUtils.split(record, dataDelimiter);
    if (fields == null || fields.length != headers.length) {
        // illegal format data, just skip
        return false;
    }
    jc.clear();
    for (int i = 0; i < fields.length; i++) {
        NSColumn nsColumn = new NSColumn(headers[i]);
        jc.set(headers[i], (fields[i] == null ? "" : fields[i]));
        jc.set(nsColumn.getSimpleName(), (fields[i] == null ? "" : fields[i]));
    }
    Boolean result = Boolean.FALSE;
    Object retObj = null;
    try {
        retObj = dataFilterExpr.evaluate(jc);
    } catch (Throwable e) {
        if (this.jexl.isStrict()) {
            throw new RuntimeException(e);
        } else {
            log.error("Error occurred when trying to evaluate " + dataFilterExpr.toString(), e);
        }
    }
    if (retObj != null && retObj instanceof Boolean) {
        result = (Boolean) retObj;
    } else if (retObj != null && !(retObj instanceof Boolean)) {
        throw new InvalidFilterResultExcetion("Invalid filter return not boolean type: " + dataFilterExpr.getExpression());
    }
    return result;
}
Also used : NSColumn(ml.shifu.shifu.column.NSColumn)

Example 4 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class EvalModelProcessor method validateEvalColumnConfig.

@SuppressWarnings("deprecation")
private void validateEvalColumnConfig(EvalConfig evalConfig) throws IOException {
    if (this.columnConfigList == null) {
        return;
    }
    String[] evalColumnNames = null;
    if (StringUtils.isNotBlank(evalConfig.getDataSet().getHeaderPath())) {
        String delimiter = // get header delimiter
        StringUtils.isBlank(evalConfig.getDataSet().getHeaderDelimiter()) ? evalConfig.getDataSet().getDataDelimiter() : evalConfig.getDataSet().getHeaderDelimiter();
        evalColumnNames = CommonUtils.getHeaders(evalConfig.getDataSet().getHeaderPath(), delimiter, evalConfig.getDataSet().getSource());
    } else {
        String delimiter = // get header delimiter
        StringUtils.isBlank(evalConfig.getDataSet().getHeaderDelimiter()) ? evalConfig.getDataSet().getDataDelimiter() : evalConfig.getDataSet().getHeaderDelimiter();
        String[] fields = CommonUtils.takeFirstLine(evalConfig.getDataSet().getDataPath(), delimiter, evalConfig.getDataSet().getSource());
        // if first line contains target column name, we guess it is csv format and first line is header.
        String evalTargetColumnName = ((StringUtils.isBlank(evalConfig.getDataSet().getTargetColumnName())) ? modelConfig.getTargetColumnName() : evalConfig.getDataSet().getTargetColumnName());
        if (StringUtils.join(fields, "").contains(evalTargetColumnName)) {
            // first line of data meaning second line in data files excluding first header line
            String[] dataInFirstLine = CommonUtils.takeFirstTwoLines(evalConfig.getDataSet().getDataPath(), delimiter, evalConfig.getDataSet().getSource())[1];
            if (dataInFirstLine != null && fields.length != dataInFirstLine.length) {
                throw new IllegalArgumentException("Eval header length and eval data length are not consistent, please check you header setting and data set setting in eval.");
            }
            // char or / in its name in shifu will be replaced;
            for (int i = 0; i < fields.length; i++) {
                fields[i] = CommonUtils.normColumnName(fields[i]);
            }
            evalColumnNames = fields;
            // for(int i = 0; i < fields.length; i++) {
            // evalColumnNames[i] = CommonUtils.getRelativePigHeaderColumnName(fields[i]);
            // }
            LOG.warn("No header path is provided, we will try to read first line and detect schema.");
            LOG.warn("Schema in ColumnConfig.json are named as first line of data set path.");
        } else {
            LOG.warn("No header path is provided, we will try to read first line and detect schema.");
            LOG.warn("Schema in ColumnConfig.json are named as  index 0, 1, 2, 3 ...");
            LOG.warn("Please make sure weight column and tag column are also taking index as name.");
            evalColumnNames = new String[fields.length];
            for (int i = 0; i < fields.length; i++) {
                evalColumnNames[i] = i + "";
            }
        }
    }
    Set<NSColumn> names = new HashSet<NSColumn>();
    for (String evalColumnName : evalColumnNames) {
        names.add(new NSColumn(evalColumnName));
    }
    String filterExpressions = super.modelConfig.getSegmentFilterExpressionsAsString();
    if (StringUtils.isNotBlank(filterExpressions)) {
        int segFilterSize = CommonUtils.split(filterExpressions, Constants.SHIFU_STATS_FILTER_EXPRESSIONS_DELIMETER).length;
        for (int i = 0; i < segFilterSize; i++) {
            for (int j = 0; j < evalColumnNames.length; j++) {
                names.add(new NSColumn(evalColumnNames[j] + "_" + (i + 1)));
            }
        }
    }
    if (Constants.GENERIC.equalsIgnoreCase(modelConfig.getAlgorithm()) || Constants.TENSORFLOW.equalsIgnoreCase(modelConfig.getAlgorithm())) {
        // TODO correct this logic
        return;
    }
    List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, evalConfig, SourceType.LOCAL, evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
    if (CollectionUtils.isNotEmpty(models)) {
        validateFinalColumns(evalConfig, this.modelConfig.getModelSetName(), false, this.columnConfigList, names);
    }
    NSColumn targetColumn = new NSColumn(evalConfig.getDataSet().getTargetColumnName());
    if (StringUtils.isNotBlank(evalConfig.getDataSet().getTargetColumnName()) && !names.contains(targetColumn) && !names.contains(new NSColumn(targetColumn.getSimpleName()))) {
        throw new IllegalArgumentException("Target column " + evalConfig.getDataSet().getTargetColumnName() + " does not exist in - " + evalConfig.getDataSet().getHeaderPath());
    }
    NSColumn weightColumn = new NSColumn(evalConfig.getDataSet().getWeightColumnName());
    if (StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName()) && !names.contains(weightColumn) && !names.contains(new NSColumn(weightColumn.getSimpleName()))) {
        throw new IllegalArgumentException("Weight column " + evalConfig.getDataSet().getWeightColumnName() + " does not exist in - " + evalConfig.getDataSet().getHeaderPath());
    }
    List<ModelSpec> subModels = ModelSpecLoaderUtils.loadSubModels(modelConfig, this.columnConfigList, evalConfig, SourceType.LOCAL, evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
    if (CollectionUtils.isNotEmpty(subModels)) {
        for (ModelSpec modelSpec : subModels) {
            validateFinalColumns(evalConfig, modelSpec.getModelName(), true, modelSpec.getColumnConfigList(), names);
        }
    }
}
Also used : BasicML(org.encog.ml.BasicML) ModelSpec(ml.shifu.shifu.core.model.ModelSpec) NSColumn(ml.shifu.shifu.column.NSColumn) HashSet(java.util.HashSet)

Example 5 with NSColumn

use of ml.shifu.shifu.column.NSColumn in project shifu by ShifuML.

the class EvalScoreUDF method exec.

@SuppressWarnings("deprecation")
public Tuple exec(Tuple input) throws IOException {
    if (isCsvFormat) {
        String firstCol = ((input.get(0) == null) ? "" : input.get(0).toString());
        if (this.headers[0].equals(CommonUtils.normColumnName(firstCol))) {
            // TODO what to do if the column value == column name? ...
            return null;
        }
    }
    long start = System.currentTimeMillis();
    if (this.modelRunner == null) {
        // here to initialize modelRunner, this is moved from constructor to here to avoid OOM in client side.
        // UDF in pig client will be initialized to get some metadata issues
        List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
        this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models, this.outputHiddenLayerIndex, this.isMultiThreadScoring);
        List<ModelSpec> subModels = ModelSpecLoaderUtils.loadSubModels(modelConfig, this.columnConfigList, evalConfig, evalConfig.getDataSet().getSource(), evalConfig.getGbtConvertToProb(), evalConfig.getGbtScoreConvertStrategy());
        if (CollectionUtils.isNotEmpty(subModels)) {
            for (ModelSpec modelSpec : subModels) {
                this.modelRunner.addSubModels(modelSpec, this.isMultiThreadScoring);
                this.subModelsCnt.put(modelSpec.getModelName(), modelSpec.getModels().size());
            }
        }
        this.modelCnt = models.size();
        // reset models in classfication case
        if (modelConfig.isClassification()) {
            if (modelConfig.getTrain().isOneVsAll()) {
                if (modelConfig.getTags().size() == 2) {
                    // onevsall, modelcnt is 1
                    this.modelCnt = 1;
                } else {
                    this.modelCnt = modelConfig.getTags().size();
                }
            } else {
                if (modelConfig.getTags().size() == 2) {
                    // native binary
                    this.modelCnt = 1;
                } else {
                    // native multiple classification model cnt is bagging num
                    this.modelCnt = (this.modelCnt >= modelConfig.getBaggingNum() ? modelConfig.getBaggingNum() : this.modelCnt);
                }
            }
            // reset models to
            models = models.subList(0, this.modelCnt);
            this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, evalConfig.getDataSet().getDataDelimiter(), models, this.outputHiddenLayerIndex, this.isMultiThreadScoring);
        }
        this.modelRunner.setScoreScale(Integer.parseInt(this.scale));
        log.info("DEBUG: model cnt " + this.modelCnt + " sub models cnt " + modelRunner.getSubModelsCnt());
    }
    Map<NSColumn, String> rawDataNsMap = CommonUtils.convertDataIntoNsMap(input, this.headers, this.segFilterSize);
    if (MapUtils.isEmpty(rawDataNsMap)) {
        return null;
    }
    String tag = CommonUtils.trimTag(rawDataNsMap.get(new NSColumn(modelConfig.getTargetColumnName(evalConfig))));
    // filter invalid tag record out
    // disable the tag check, since there is no bad tag in eval data set
    // and user just want to score the data, but don't run performance evaluation
    /*
         * if(!tagSet.contains(tag)) {
         * if(System.currentTimeMillis() % 100 == 0) {
         * log.warn("Invalid tag: " + tag);
         * }
         * if(isPigEnabled(Constants.SHIFU_GROUP_COUNTER, "INVALID_TAG")) {
         * PigStatusReporter.getInstance().getCounter(Constants.SHIFU_GROUP_COUNTER, Constants.COUNTER_RECORDS)
         * .increment(1);
         * }
         * return null;
         * }
         */
    long startTime = System.nanoTime();
    CaseScoreResult cs = modelRunner.computeNsData(rawDataNsMap);
    long runInterval = (System.nanoTime() - startTime) / 1000L;
    if (cs == null) {
        if (System.currentTimeMillis() % 100 == 0) {
            log.warn("Get null result, for input: " + input.toDelimitedString("|"));
        }
        return null;
    }
    Tuple tuple = TupleFactory.getInstance().newTuple();
    tuple.append(tag);
    String weight = null;
    if (StringUtils.isNotBlank(evalConfig.getDataSet().getWeightColumnName())) {
        weight = rawDataNsMap.get(new NSColumn(evalConfig.getDataSet().getWeightColumnName()));
    } else {
        weight = "1.0";
    }
    incrementTagCounters(tag, weight, runInterval);
    Map<String, CaseScoreResult> subModelScores = cs.getSubModelScores();
    tuple.append(weight);
    if (this.isLinearTarget || modelConfig.isRegression()) {
        if (CollectionUtils.isNotEmpty(cs.getScores())) {
            appendModelScore(tuple, cs, true);
            if (this.outputHiddenLayerIndex != 0) {
                appendFirstHiddenOutputScore(tuple, cs.getHiddenLayerScores(), true);
            }
        }
        if (MapUtils.isNotEmpty(subModelScores)) {
            Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
            while (iterator.hasNext()) {
                Map.Entry<String, CaseScoreResult> entry = iterator.next();
                CaseScoreResult subCs = entry.getValue();
                appendModelScore(tuple, subCs, false);
            }
        }
    } else {
        if (CollectionUtils.isNotEmpty(cs.getScores())) {
            appendSimpleScore(tuple, cs);
            tuple.append(this.mcPredictor.predictTag(cs).getTag());
        }
        if (MapUtils.isNotEmpty(subModelScores)) {
            Iterator<Map.Entry<String, CaseScoreResult>> iterator = subModelScores.entrySet().iterator();
            while (iterator.hasNext()) {
                Map.Entry<String, CaseScoreResult> entry = iterator.next();
                CaseScoreResult subCs = entry.getValue();
                appendSimpleScore(tuple, subCs);
            }
        }
    }
    // append meta data
    List<String> metaColumns = evalConfig.getAllMetaColumns(modelConfig);
    if (CollectionUtils.isNotEmpty(metaColumns)) {
        for (String meta : metaColumns) {
            tuple.append(rawDataNsMap.get(new NSColumn(meta)));
        }
    }
    if (System.currentTimeMillis() % 1000 == 0L) {
        log.info("running time is " + (System.currentTimeMillis() - start) + " ms.");
    }
    return tuple;
}
Also used : BasicML(org.encog.ml.BasicML) CaseScoreResult(ml.shifu.shifu.container.CaseScoreResult) Entry(java.util.Map.Entry) ModelSpec(ml.shifu.shifu.core.model.ModelSpec) Map(java.util.Map) SortedMap(java.util.SortedMap) Tuple(org.apache.pig.data.Tuple) ModelRunner(ml.shifu.shifu.core.ModelRunner) NSColumn(ml.shifu.shifu.column.NSColumn)

Aggregations

NSColumn (ml.shifu.shifu.column.NSColumn)17 ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)8 CaseScoreResult (ml.shifu.shifu.container.CaseScoreResult)4 Tuple (org.apache.pig.data.Tuple)4 BasicML (org.encog.ml.BasicML)3 ModelRunner (ml.shifu.shifu.core.ModelRunner)2 ModelSpec (ml.shifu.shifu.core.model.ModelSpec)2 BasicMLData (org.encog.ml.data.basic.BasicMLData)2 BasicMLDataPair (org.encog.ml.data.basic.BasicMLDataPair)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 SortedMap (java.util.SortedMap)1 ColumnConfigComparator (ml.shifu.shifu.container.obj.ColumnConfig.ColumnConfigComparator)1 NormType (ml.shifu.shifu.container.obj.ModelNormalizeConf.NormType)1 ShifuException (ml.shifu.shifu.exception.ShifuException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 Path (org.apache.hadoop.fs.Path)1