Search in sources :

Example 76 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class CommonUtilsTest method hasCandidateTest.

// @Test
// public void assembleDataPairTest() throws Exception {
// Map<String, String> rawDataMap = new HashMap<String, String>();
// rawDataMap.put("ColumnA", "TestValue");
// 
// ColumnConfig config = new ColumnConfig();
// config.setColumnName("ColumnA");
// List<ColumnConfig> columnConfigList = new ArrayList<ColumnConfig>();
// columnConfigList.add(config);
// 
// MLDataPair dp = CommonUtils.assembleDataPair(columnConfigList,
// rawDataMap);
// Assert.assertTrue(dp.getInput().getData().length == 0);
// 
// Map<String, Object> objDataMap = new HashMap<String, Object>();
// objDataMap.put("ColumnA", 10);
// config.setFinalSelect(true);
// config.setMean(12.0);
// config.setStdDev(4.6);
// MLDataPair pair = CommonUtils.assembleDataPair(columnConfigList,
// objDataMap);
// Assert.assertTrue(pair.getInput().getData()[0] < 0.0);
// }
@Test
public void hasCandidateTest() {
    List<ColumnConfig> configList = new ArrayList<ColumnConfig>();
    ColumnConfig config = new ColumnConfig();
    config.setColumnName("A");
    config.setFinalSelect(false);
    config.setColumnFlag(ColumnFlag.Candidate);
    configList.add(config);
    Assert.assertTrue(CommonUtils.hasCandidateColumns(configList));
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 77 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class CommonUtilsTest method getFinalSelectColumnConfigListTest.

@Test
public void getFinalSelectColumnConfigListTest() {
    Collection<ColumnConfig> configList = new ArrayList<ColumnConfig>();
    ColumnConfig config = new ColumnConfig();
    config.setColumnName("A");
    config.setFinalSelect(false);
    configList.add(config);
    config = new ColumnConfig();
    config.setFinalSelect(true);
    config.setColumnName("B");
    configList.add(config);
    config = new ColumnConfig();
    config.setFinalSelect(false);
    config.setColumnName("C");
    configList.add(config);
    configList = CommonUtils.getFinalSelectColumnConfigList(configList);
    Assert.assertTrue(configList.size() == 1);
}
Also used : ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 78 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class TreeModelMiningSchemaCreator method build.

@Override
public MiningSchema build(BasicML basicML) {
    MiningSchema miningSchema = new MiningSchema();
    for (ColumnConfig columnConfig : columnConfigList) {
        if (columnConfig.isFinalSelect() || columnConfig.isTarget()) {
            MiningField miningField = new MiningField();
            // TODO, how to support segment variable in tree model, here should be changed
            miningField.setName(FieldName.create(NormalUtils.getSimpleColumnName(columnConfig.getColumnName())));
            miningField.setOpType(getOptype(columnConfig));
            if (columnConfig.isNumerical()) {
                miningField.setMissingValueReplacement(String.valueOf(columnConfig.getColumnStats().getMean()));
            } else {
                miningField.setMissingValueReplacement("");
            }
            if (columnConfig.isFinalSelect()) {
                miningField.setUsageType(UsageType.ACTIVE);
            } else if (columnConfig.isTarget()) {
                miningField.setUsageType(UsageType.TARGET);
            }
            miningSchema.addMiningFields(miningField);
        }
    }
    return miningSchema;
}
Also used : MiningField(org.dmg.pmml.MiningField) MiningSchema(org.dmg.pmml.MiningSchema) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig)

Example 79 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class PostTrainMapper method initFeatureStats.

private void initFeatureStats() {
    this.variableStatsMap = new HashMap<Integer, List<BinStats>>();
    for (ColumnConfig config : this.columnConfigList) {
        if (!config.isMeta() && !config.isTarget() && config.isFinalSelect()) {
            List<BinStats> feaureStatistics = null;
            int binSize = 0;
            if (config.isNumerical()) {
                binSize = config.getBinBoundary().size() + 1;
            }
            if (config.isCategorical()) {
                binSize = config.getBinCategory().size();
            }
            feaureStatistics = new ArrayList<BinStats>(binSize);
            for (int i = 0; i < binSize; i++) {
                feaureStatistics.add(new BinStats(0, 0));
            }
            this.variableStatsMap.put(config.getColumnNum(), feaureStatistics);
        }
    }
}
Also used : BinStats(ml.shifu.shifu.core.posttrain.FeatureStatsWritable.BinStats) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) ArrayList(java.util.ArrayList) List(java.util.List)

Example 80 with ColumnConfig

use of ml.shifu.shifu.container.obj.ColumnConfig in project shifu by ShifuML.

the class ZscoreLocalTransformCreator method build.

@Override
public LocalTransformations build(BasicML basicML) {
    LocalTransformations localTransformations = new LocalTransformations();
    if (basicML instanceof BasicFloatNetwork) {
        BasicFloatNetwork bfn = (BasicFloatNetwork) basicML;
        Set<Integer> featureSet = bfn.getFeatureSet();
        for (ColumnConfig config : columnConfigList) {
            if (config.isFinalSelect() && (CollectionUtils.isEmpty(featureSet) || featureSet.contains(config.getColumnNum()))) {
                double cutoff = modelConfig.getNormalizeStdDevCutOff();
                List<DerivedField> deriviedFields = config.isCategorical() ? createCategoricalDerivedField(config, cutoff, modelConfig.getNormalizeType()) : createNumericalDerivedField(config, cutoff, modelConfig.getNormalizeType());
                localTransformations.addDerivedFields(deriviedFields.toArray(new DerivedField[deriviedFields.size()]));
            }
        }
    } else {
        for (ColumnConfig config : columnConfigList) {
            if (config.isFinalSelect()) {
                double cutoff = modelConfig.getNormalizeStdDevCutOff();
                List<DerivedField> deriviedFields = config.isCategorical() ? createCategoricalDerivedField(config, cutoff, modelConfig.getNormalizeType()) : createNumericalDerivedField(config, cutoff, modelConfig.getNormalizeType());
                localTransformations.addDerivedFields(deriviedFields.toArray(new DerivedField[deriviedFields.size()]));
            }
        }
    }
    return localTransformations;
}
Also used : LocalTransformations(org.dmg.pmml.LocalTransformations) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) BasicFloatNetwork(ml.shifu.shifu.core.dtrain.dataset.BasicFloatNetwork) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

ColumnConfig (ml.shifu.shifu.container.obj.ColumnConfig)131 ArrayList (java.util.ArrayList)36 Test (org.testng.annotations.Test)17 IOException (java.io.IOException)16 HashMap (java.util.HashMap)12 Tuple (org.apache.pig.data.Tuple)10 File (java.io.File)8 NSColumn (ml.shifu.shifu.column.NSColumn)8 ModelConfig (ml.shifu.shifu.container.obj.ModelConfig)8 ShifuException (ml.shifu.shifu.exception.ShifuException)8 Path (org.apache.hadoop.fs.Path)8 List (java.util.List)7 Scanner (java.util.Scanner)7 DataBag (org.apache.pig.data.DataBag)7 SourceType (ml.shifu.shifu.container.obj.RawSourceData.SourceType)5 BasicFloatNetwork (ml.shifu.shifu.core.dtrain.dataset.BasicFloatNetwork)5 TrainingDataSet (ml.shifu.shifu.core.dvarsel.dataset.TrainingDataSet)5 BasicMLData (org.encog.ml.data.basic.BasicMLData)5 BufferedWriter (java.io.BufferedWriter)3 FileInputStream (java.io.FileInputStream)3