Search in sources :

Example 11 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class DTrainUtils method getInputOutputCandidateCounts.

/**
 * Get input nodes number (final select) and output nodes number from column config, and candidate input node
 * number.
 *
 * <p>
 * If number of column in final-select is 0, which means to select all non meta and non target columns. So the input
 * number is set to all candidates.
 *
 * @param normType
 *            normalization type
 * @param columnConfigList
 *            the column config list
 * @return [input, output, candidate]
 * @throws NullPointerException
 *             if columnConfigList or ColumnConfig object in columnConfigList is null.
 */
public static int[] getInputOutputCandidateCounts(ModelNormalizeConf.NormType normType, List<ColumnConfig> columnConfigList) {
    @SuppressWarnings("unused") int input = 0, output = 0, totalCandidate = 0, goodCandidate = 0;
    boolean hasCandidate = CommonUtils.hasCandidateColumns(columnConfigList);
    for (ColumnConfig config : columnConfigList) {
        if (!config.isTarget() && !config.isMeta()) {
            totalCandidate += 1;
            if (CommonUtils.isGoodCandidate(config, hasCandidate)) {
                goodCandidate += 1;
            }
        }
        if (config.isFinalSelect() && !config.isTarget() && !config.isMeta()) {
            if (normType.equals(ModelNormalizeConf.NormType.ONEHOT)) {
                if (config.isCategorical()) {
                    input += config.getBinCategory().size() + 1;
                } else {
                    input += config.getBinBoundary().size() + 1;
                }
            } else if (normType.equals(ModelNormalizeConf.NormType.ZSCALE_ONEHOT) && config.isCategorical()) {
                input += config.getBinCategory().size() + 1;
            } else {
                input += 1;
            }
        }
        if (config.isTarget()) {
            output += 1;
        }
    }
    return new int[] { input, output, goodCandidate };
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig)

Example 12 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class StatsModelProcessor method run.

/**
 * runner for statistics
 */
@Override
public int run() throws Exception {
    log.info("Step Start: stats");
    long start = System.currentTimeMillis();
    try {
        // 0. set up and sync to HDFS
        setUp(ModelStep.STATS);
        // resync ModelConfig.json/ColumnConfig.json to HDFS
        syncDataToHdfs(modelConfig.getDataSet().getSource());
        if (getBooleanParam(this.params, Constants.IS_COMPUTE_CORR)) {
            // 1. validate if run stats before run stats -correlation
            boolean foundValidMeanValueColumn = isMeanCalculated();
            if (!foundValidMeanValueColumn) {
                log.warn("Some mean value of column is null, could you check if you run 'shifu stats'.");
                return -1;
            }
            // 2. compute correlation
            log.info("Start computing correlation value ...");
            SourceType source = this.modelConfig.getDataSet().getSource();
            String corrPath = super.getPathFinder().getCorrelationPath(source);
            // check if can start from existing output
            boolean reuseCorrResult = Environment.getBoolean("shifu.stats.corr.reuse", Boolean.FALSE);
            if (reuseCorrResult && ShifuFileUtils.isFileExists(corrPath, SourceType.HDFS)) {
                dumpAndCalculateCorrelationResult(source, corrPath);
            } else {
                runCorrMapReduceJob();
            }
            // 3. save column config list
            saveColumnConfigList();
        } else if (getBooleanParam(this.params, Constants.IS_COMPUTE_PSI)) {
            boolean foundValidMeanValueColumn = isMeanCalculated();
            if (!foundValidMeanValueColumn) {
                log.warn("Some mean value of column is null, could you check if you run 'shifu stats'.");
                return -1;
            }
            if (StringUtils.isNotEmpty(modelConfig.getPsiColumnName())) {
                new MapReducerStatsWorker(this, modelConfig, columnConfigList).runPSI();
                // save column config list after running PSI successfully
                saveColumnConfigList();
            } else {
                log.warn("To Run PSI please set your PSI column in dataSet::psiColumnName.");
            }
        } else if (getBooleanParam(this.params, Constants.IS_REBIN)) {
            // run the re-binning
            String backupColumnConfigPath = this.pathFinder.getBackupColumnConfig();
            if (!ShifuFileUtils.isFileExists(new Path(backupColumnConfigPath), SourceType.LOCAL)) {
                ShifuFileUtils.createDirIfNotExists(new SourceFile(Constants.TMP, SourceType.LOCAL));
                saveColumnConfigList(backupColumnConfigPath, this.columnConfigList);
            } else {
                // existing backup ColumnConfig.json, use binning info in it to do rebin
                List<ColumnConfig> backColumnConfigList = CommonUtils.loadColumnConfigList(backupColumnConfigPath, SourceType.LOCAL, false);
                for (ColumnConfig backupColumnConfig : backColumnConfigList) {
                    for (ColumnConfig columnConfig : this.columnConfigList) {
                        if (NSColumnUtils.isColumnEqual(backupColumnConfig.getColumnName(), columnConfig.getColumnName())) {
                            columnConfig.setColumnBinning(backupColumnConfig.getColumnBinning());
                        }
                    }
                }
            }
            // user provide candidate variable list or not
            boolean hasCandidates = CommonUtils.hasCandidateColumns(this.columnConfigList);
            List<ColumnConfig> rebinColumns = new ArrayList<ColumnConfig>();
            List<String> catVariables = getStringList(this.params, Constants.REQUEST_VARS, ",");
            for (ColumnConfig columnConfig : this.columnConfigList) {
                if (CollectionUtils.isEmpty(catVariables) || isRequestColumn(catVariables, columnConfig)) {
                    if (CommonUtils.isGoodCandidate(columnConfig, hasCandidates)) {
                        rebinColumns.add(columnConfig);
                    } else {
                        log.warn("Column - {} is not a good candidate. Skip it.", columnConfig.getColumnName());
                    }
                }
            }
            if (CollectionUtils.isNotEmpty(rebinColumns)) {
                for (ColumnConfig columnConfig : rebinColumns) {
                    doReBin(columnConfig);
                }
            }
            // use the merge ColumnConfig.json to replace current one
            saveColumnConfigList();
        } else {
            AbstractStatsExecutor statsExecutor = null;
            if (modelConfig.isMapReduceRunMode()) {
                if (modelConfig.getBinningAlgorithm().equals(ModelStatsConf.BinningAlgorithm.DynamicBinning)) {
                    statsExecutor = new DIBStatsExecutor(this, modelConfig, columnConfigList);
                } else if (modelConfig.getBinningAlgorithm().equals(ModelStatsConf.BinningAlgorithm.MunroPat)) {
                    statsExecutor = new MunroPatStatsExecutor(this, modelConfig, columnConfigList);
                } else if (modelConfig.getBinningAlgorithm().equals(ModelStatsConf.BinningAlgorithm.MunroPatI)) {
                    statsExecutor = new MunroPatIStatsExecutor(this, modelConfig, columnConfigList);
                } else if (modelConfig.getBinningAlgorithm().equals(ModelStatsConf.BinningAlgorithm.SPDT)) {
                    statsExecutor = new SPDTStatsExecutor(this, modelConfig, columnConfigList);
                } else if (modelConfig.getBinningAlgorithm().equals(ModelStatsConf.BinningAlgorithm.SPDTI)) {
                    statsExecutor = new SPDTIStatsExecutor(this, modelConfig, columnConfigList);
                } else {
                    statsExecutor = new SPDTIStatsExecutor(this, modelConfig, columnConfigList);
                }
            } else if (modelConfig.isLocalRunMode()) {
                statsExecutor = new AkkaStatsWorker(this, modelConfig, columnConfigList);
            } else {
                throw new ShifuException(ShifuErrorCode.ERROR_UNSUPPORT_MODE);
            }
            statsExecutor.doStats();
            // update the backup ColumnConfig.json after running stats
            String backupColumnConfigPath = this.pathFinder.getBackupColumnConfig();
            ShifuFileUtils.createDirIfNotExists(new SourceFile(Constants.TMP, SourceType.LOCAL));
            saveColumnConfigList(backupColumnConfigPath, this.columnConfigList);
        }
        // back up current column config each time as stats will always change CC.json
        this.backupCurrentColumnConfigToLocal(SDF.format(new Date()));
        syncDataToHdfs(modelConfig.getDataSet().getSource());
        clearUp(ModelStep.STATS);
    } catch (ShifuException e) {
        log.error("Error:" + e.getError().toString() + "; msg:" + e.getMessage(), e);
        return -1;
    } catch (Exception e) {
        log.error("Error:" + e.getMessage(), e);
        return -1;
    }
    log.info("Step Finished: stats with {} ms", (System.currentTimeMillis() - start));
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) MapReducerStatsWorker(ml.shifu.shifu.core.processor.stats.MapReducerStatsWorker) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) ArrayList(java.util.ArrayList) AbstractStatsExecutor(ml.shifu.shifu.core.processor.stats.AbstractStatsExecutor) AkkaStatsWorker(ml.shifu.shifu.core.processor.stats.AkkaStatsWorker) Date(java.util.Date) ShifuException(ml.shifu.shifu.exception.ShifuException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) JexlException(org.apache.commons.jexl2.JexlException) IOException(java.io.IOException) DIBStatsExecutor(ml.shifu.shifu.core.processor.stats.DIBStatsExecutor) MunroPatIStatsExecutor(ml.shifu.shifu.core.processor.stats.MunroPatIStatsExecutor) SPDTIStatsExecutor(ml.shifu.shifu.core.processor.stats.SPDTIStatsExecutor) SPDTStatsExecutor(ml.shifu.shifu.core.processor.stats.SPDTStatsExecutor) MunroPatStatsExecutor(ml.shifu.shifu.core.processor.stats.MunroPatStatsExecutor) SourceFile(ml.shifu.shifu.fs.SourceFile) ShifuException(ml.shifu.shifu.exception.ShifuException)

Example 13 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class VarSelectModelProcessor method persistColumnIds.

private int persistColumnIds(Path path) {
    try {
        List<Scanner> scanners = ShifuFileUtils.getDataScanners(path.toString(), modelConfig.getDataSet().getSource());
        List<Integer> ids = null;
        for (Scanner scanner : scanners) {
            while (scanner.hasNextLine()) {
                String[] raw = scanner.nextLine().trim().split("\\|");
                @SuppressWarnings("unused") int idSize = Integer.parseInt(raw[0]);
                ids = CommonUtils.stringToIntegerList(raw[1]);
            }
        }
        // prevent multiply running setting
        for (ColumnConfig config : columnConfigList) {
            if (!config.isForceSelect()) {
                config.setFinalSelect(Boolean.FALSE);
            }
        }
        for (Integer id : ids) {
            this.columnConfigList.get(id).setFinalSelect(Boolean.TRUE);
        }
        super.saveColumnConfigList();
    } catch (IOException e) {
        e.printStackTrace();
        return -1;
    } catch (IllegalArgumentException e) {
        e.printStackTrace();
        return -1;
    }
    return 0;
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) IOException(java.io.IOException)

Example 14 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class VarSelectModelProcessor method run.

/**
 * Run for the variable selection
 */
@Override
public int run() throws Exception {
    log.info("Step Start: varselect");
    long start = System.currentTimeMillis();
    try {
        setUp(ModelStep.VARSELECT);
        validateParameters();
        // reset all selections if user specify or select by absolute number
        if (getIsToReset()) {
            log.info("Reset all selections data including type final select etc!");
            resetAllFinalSelect();
        } else if (getIsToList()) {
            log.info("Below variables are selected - ");
            for (ColumnConfig columnConfig : this.columnConfigList) {
                if (columnConfig.isFinalSelect()) {
                    log.info(columnConfig.getColumnName());
                }
            }
            log.info("-----  Done -----");
        } else if (getIsToAutoFilter()) {
            log.info("Start to run variable auto filter.");
            runAutoVarFilter();
            log.info("-----  Done -----");
        } else if (getIsRecoverAuto()) {
            String varselHistory = pathFinder.getVarSelHistory();
            if (ShifuFileUtils.isFileExists(varselHistory, SourceType.LOCAL)) {
                log.info("!!! Auto filtered variables will be recovered from history.");
                recoverVarselStatusFromHist(varselHistory);
                log.info("-----  Done -----");
            } else {
                log.warn("No variables auto filter history is found.");
            }
        } else {
            // sync to make sure load from hdfs config is consistent with local configuration
            syncDataToHdfs(super.modelConfig.getDataSet().getSource());
            String filterExpressions = super.modelConfig.getSegmentFilterExpressionsAsString();
            Environment.getProperties().put("shifu.segment.expressions", filterExpressions);
            if (StringUtils.isNotBlank(filterExpressions)) {
                String[] splits = CommonUtils.split(filterExpressions, Constants.SHIFU_STATS_FILTER_EXPRESSIONS_DELIMETER);
                for (int i = 0; i < super.columnConfigList.size(); i++) {
                    ColumnConfig config = super.columnConfigList.get(i);
                    int rawSize = super.columnConfigList.size() / (1 + splits.length);
                    if (config.isTarget()) {
                        for (int j = 0; j < splits.length; j++) {
                            ColumnConfig otherConfig = super.columnConfigList.get((j + 1) * rawSize + i);
                            otherConfig.setColumnFlag(ColumnFlag.ForceRemove);
                            otherConfig.setFinalSelect(false);
                        }
                        break;
                    }
                }
                this.saveColumnConfigList();
                // sync to make sure load from hdfs config is consistent with local configuration
                syncDataToHdfs(super.modelConfig.getDataSet().getSource());
            }
            if (modelConfig.isRegression()) {
                String filterBy = this.modelConfig.getVarSelectFilterBy();
                if (filterBy.equalsIgnoreCase(Constants.FILTER_BY_KS) || filterBy.equalsIgnoreCase(Constants.FILTER_BY_IV) || filterBy.equalsIgnoreCase(Constants.FILTER_BY_PARETO) || filterBy.equalsIgnoreCase(Constants.FILTER_BY_MIX)) {
                    VariableSelector selector = new VariableSelector(this.modelConfig, this.columnConfigList);
                    this.columnConfigList = selector.selectByFilter();
                } else if (filterBy.equalsIgnoreCase(Constants.FILTER_BY_FI)) {
                    if (!CommonUtils.isTreeModel(modelConfig.getAlgorithm())) {
                        throw new IllegalArgumentException("Filter by FI only works well in GBT/RF. Please check your modelconfig::train.");
                    }
                    selectByFeatureImportance();
                } else if (filterBy.equalsIgnoreCase(Constants.FILTER_BY_SE) || filterBy.equalsIgnoreCase(Constants.FILTER_BY_ST)) {
                    if (!Constants.NN.equalsIgnoreCase(modelConfig.getAlgorithm()) && !Constants.LR.equalsIgnoreCase(modelConfig.getAlgorithm())) {
                        throw new IllegalArgumentException("Filter by SE/ST only works well in NN/LR. Please check your modelconfig::train.");
                    }
                    int recursiveCnt = getRecursiveCnt();
                    int i = 0;
                    // create varsel directory and write original copy of ColumnConfig.json
                    ShifuFileUtils.createDirIfNotExists(pathFinder.getVarSelDir(), SourceType.LOCAL);
                    super.saveColumnConfigList(pathFinder.getVarSelColumnConfig(i), this.columnConfigList);
                    while ((i++) < recursiveCnt) {
                        String trainLogFile = TRAIN_LOG_PREFIX + "-" + (i - 1) + ".log";
                        distributedSEWrapper(trainLogFile);
                        // copy training log to SE train.log
                        ShifuFileUtils.move(trainLogFile, new File(pathFinder.getVarSelDir(), trainLogFile).getPath(), SourceType.LOCAL);
                        String varSelectMSEOutputPath = pathFinder.getVarSelectMSEOutputPath(modelConfig.getDataSet().getSource());
                        // even fail to run SE, still to create an empty se.x file
                        String varSelMSEHistPath = pathFinder.getVarSelMSEHistPath(i - 1);
                        ShifuFileUtils.createFileIfNotExists(varSelMSEHistPath, SourceType.LOCAL);
                        ShifuFileUtils.copyToLocal(new SourceFile(varSelectMSEOutputPath, modelConfig.getDataSet().getSource()), Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME, varSelMSEHistPath);
                        // save as backup
                        super.saveColumnConfigList(pathFinder.getVarSelColumnConfig(i), this.columnConfigList);
                        // save as current copy
                        super.saveColumnConfigList();
                    }
                } else if (filterBy.equalsIgnoreCase(Constants.FILTER_BY_VOTED)) {
                    votedVariablesSelection();
                }
            } else {
                boolean hasCandidates = CommonUtils.hasCandidateColumns(this.columnConfigList);
                if (this.modelConfig.getVarSelect().getForceEnable() && CollectionUtils.isNotEmpty(this.modelConfig.getListForceSelect())) {
                    log.info("Force Selection is enabled ... " + "for multi-classification, currently only use it to selected variables.");
                    for (ColumnConfig config : this.columnConfigList) {
                        if (config.isForceSelect()) {
                            if (!CommonUtils.isGoodCandidate(config, hasCandidates, modelConfig.isRegression())) {
                                log.warn("!! Variable - {} is not a good candidate. But it is in forceselect list", config.getColumnName());
                            }
                            config.setFinalSelect(true);
                        }
                    }
                    log.info("{} variables are selected by force.", this.modelConfig.getListForceSelect().size());
                } else {
                    // multiple classification, select all candidate at first, TODO add SE for multi-classification
                    for (ColumnConfig config : this.columnConfigList) {
                        if (CommonUtils.isGoodCandidate(config, hasCandidates, modelConfig.isRegression())) {
                            config.setFinalSelect(true);
                        }
                    }
                }
            }
            // clean shadow targets for multi-segments
            cleanShadowTargetsForSegments();
            if (modelConfig.getVarSelect().getAutoFilterEnable()) {
                runAutoVarFilter();
            }
        }
        // save column config to file and sync to
        clearUp(ModelStep.VARSELECT);
    } catch (ShifuException e) {
        log.error("Error:" + e.getError().toString() + "; msg:" + e.getMessage(), e);
        return -1;
    } catch (Exception e) {
        log.error("Error:" + e.getMessage(), e);
        return -1;
    }
    log.info("Step Finished: varselect with {} ms", (System.currentTimeMillis() - start));
    return 0;
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) VariableSelector(ml.shifu.shifu.core.VariableSelector) SourceFile(ml.shifu.shifu.fs.SourceFile) SourceFile(ml.shifu.shifu.fs.SourceFile) File(java.io.File) ShifuException(ml.shifu.shifu.exception.ShifuException) ShifuException(ml.shifu.shifu.exception.ShifuException) JexlException(org.apache.commons.jexl2.JexlException) IOException(java.io.IOException)

Example 15 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class VarSelectModelProcessor method prepareVarSelParams.

@SuppressWarnings("unused")
private void prepareVarSelParams(final List<String> args, final SourceType sourceType) {
    args.add("-libjars");
    args.add(addRuntimeJars());
    args.add("-i");
    args.add(ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path(modelConfig.getDataSetRawPath())).toString());
    String zkServers = Environment.getProperty(Environment.ZOO_KEEPER_SERVERS);
    if (StringUtils.isEmpty(zkServers)) {
        log.warn("No specified zookeeper settings from zookeeperServers in shifuConfig file, Guagua will set embeded zookeeper server in client process. For big data applications, specified zookeeper servers are strongly recommended.");
    } else {
        args.add("-z");
        args.add(zkServers);
    }
    // setting the class
    args.add("-w");
    args.add(VarSelWorker.class.getName());
    args.add("-m");
    args.add(VarSelMaster.class.getName());
    args.add("-c");
    // the reason to add 1 is that the first iteration in D-NN implementation is used for training preparation.
    // FIXME, how to set iteration number
    int forceSelectCount = 0;
    int candidateCount = 0;
    boolean hasCandidates = CommonUtils.hasCandidateColumns(columnConfigList);
    for (ColumnConfig columnConfig : columnConfigList) {
        if (columnConfig.isForceSelect()) {
            forceSelectCount++;
        }
        if (CommonUtils.isGoodCandidate(columnConfig, hasCandidates)) {
            candidateCount++;
        }
    }
    int iterationCnt = (Integer) this.modelConfig.getVarSelect().getParams().get(CandidateGenerator.POPULATION_MULTIPLY_CNT) + 1;
    args.add(Integer.toString(iterationCnt));
    args.add("-mr");
    args.add(VarSelMasterResult.class.getName());
    args.add("-wr");
    args.add(VarSelWorkerResult.class.getName());
    // setting conductor
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, ml.shifu.shifu.util.Constants.VAR_SEL_MASTER_CONDUCTOR, Environment.getProperty(Environment.VAR_SEL_MASTER_CONDUCTOR, WrapperMasterConductor.class.getName())));
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, ml.shifu.shifu.util.Constants.VAR_SEL_WORKER_CONDUCTOR, Environment.getProperty(Environment.VAR_SEL_MASTER_CONDUCTOR, WrapperWorkerConductor.class.getName())));
    // setting queue
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, ml.shifu.shifu.util.Constants.DEFAULT_JOB_QUEUE)));
    // MAPRED timeout
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, NNConstants.MAPRED_TASK_TIMEOUT, Environment.getInt(NNConstants.MAPRED_TASK_TIMEOUT, ml.shifu.shifu.util.Constants.DEFAULT_MAPRED_TIME_OUT)));
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, GuaguaConstants.GUAGUA_MASTER_INTERCEPTERS, VarSelOutput.class.getName()));
    // setting model config column config
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, CommonConstants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path(super.getPathFinder().getModelConfigPath(sourceType)))));
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, CommonConstants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(sourceType).makeQualified(new Path(super.getPathFinder().getColumnConfigPath(sourceType)))));
    // source type
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, CommonConstants.MODELSET_SOURCE_TYPE, sourceType));
    // computation time
    args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, GuaguaConstants.GUAGUA_COMPUTATION_TIME_THRESHOLD, 60 * 60 * 1000l));
    setHeapSizeAndSplitSize(args);
    // one can set guagua conf in shifuconfig
    CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() {

        @Override
        public void inject(Object key, Object value) {
            args.add(String.format(CommonConstants.MAPREDUCE_PARAM_FORMAT, key.toString(), value.toString()));
        }
    });
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) WrapperWorkerConductor(ml.shifu.shifu.core.dvarsel.wrapper.WrapperWorkerConductor) WrapperMasterConductor(ml.shifu.shifu.core.dvarsel.wrapper.WrapperMasterConductor)

Aggregations

ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)131 ArrayList (java.util.ArrayList)36 Test (org.testng.annotations.Test)17 IOException (java.io.IOException)16 HashMap (java.util.HashMap)12 Tuple (org.apache.pig.data.Tuple)10 File (java.io.File)8 NSColumn (ml.shifu.shifu.column.NSColumn)8 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)8 ShifuException (ml.shifu.shifu.exception.ShifuException)8 Path (org.apache.hadoop.fs.Path)8 List (java.util.List)7 Scanner (java.util.Scanner)7 DataBag (org.apache.pig.data.DataBag)7 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)5 BasicFloatNetwork (ml.shifu.shifu.core.dtrain.dataset.BasicFloatNetwork)5 TrainingDataSet (ml.shifu.shifu.core.dvarsel.dataset.TrainingDataSet)5 BasicMLData (org.encog.ml.data.basic.BasicMLData)5 BufferedWriter (java.io.BufferedWriter)3 FileInputStream (java.io.FileInputStream)3