use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.
the class MultiFilterTest method getConfiguredFilterVariant.
/**
* Creates a configured MultiFilter (variant)
*/
public Filter getConfiguredFilterVariant() {
MultiFilter result = new MultiFilter();
Filter[] filters = new Filter[2];
filters[0] = new ReplaceMissingValues();
filters[1] = new Center();
result.setFilters(filters);
return result;
}
use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.
the class SGD method buildClassifier.
/**
* Method for building the classifier.
*
* @param data the set of training instances.
* @throws Exception if the classifier can't be built successfully.
*/
@Override
public void buildClassifier(Instances data) throws Exception {
reset();
// can classifier handle the data?
getCapabilities().testWithFail(data);
data = new Instances(data);
data.deleteWithMissingClass();
if (data.numInstances() > 0 && !m_dontReplaceMissing) {
m_replaceMissing = new ReplaceMissingValues();
m_replaceMissing.setInputFormat(data);
data = Filter.useFilter(data, m_replaceMissing);
}
// check for only numeric attributes
boolean onlyNumeric = true;
for (int i = 0; i < data.numAttributes(); i++) {
if (i != data.classIndex()) {
if (!data.attribute(i).isNumeric()) {
onlyNumeric = false;
break;
}
}
}
if (!onlyNumeric) {
if (data.numInstances() > 0) {
m_nominalToBinary = new weka.filters.supervised.attribute.NominalToBinary();
} else {
m_nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary();
}
m_nominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_nominalToBinary);
}
if (!m_dontNormalize && data.numInstances() > 0) {
m_normalize = new Normalize();
m_normalize.setInputFormat(data);
data = Filter.useFilter(data, m_normalize);
}
m_numInstances = data.numInstances();
m_weights = new double[data.numAttributes() + 1];
m_data = new Instances(data, 0);
if (data.numInstances() > 0) {
// randomize the data
data.randomize(new Random(getSeed()));
train(data);
}
}
use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.
the class SMO method buildClassifier.
/**
* Method for building the classifier. Implements a one-against-one
* wrapper for multi-class problems.
*
* @param insts the set of training instances
* @throws Exception if the classifier can't be built successfully
*/
public void buildClassifier(Instances insts) throws Exception {
if (!m_checksTurnedOff) {
// can classifier handle the data?
getCapabilities().testWithFail(insts);
// remove instances with missing class
insts = new Instances(insts);
insts.deleteWithMissingClass();
/* Removes all the instances with weight equal to 0.
MUST be done since condition (8) of Keerthi's paper
is made with the assertion Ci > 0 (See equation (3a). */
Instances data = new Instances(insts, insts.numInstances());
for (int i = 0; i < insts.numInstances(); i++) {
if (insts.instance(i).weight() > 0)
data.add(insts.instance(i));
}
if (data.numInstances() == 0) {
throw new Exception("No training instances left after removing " + "instances with weight 0!");
}
insts = data;
}
if (!m_checksTurnedOff) {
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Missing);
} else {
m_Missing = null;
}
if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
boolean onlyNumeric = true;
if (!m_checksTurnedOff) {
for (int i = 0; i < insts.numAttributes(); i++) {
if (i != insts.classIndex()) {
if (!insts.attribute(i).isNumeric()) {
onlyNumeric = false;
break;
}
}
}
}
if (!onlyNumeric) {
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(insts);
insts = Filter.useFilter(insts, m_NominalToBinary);
} else {
m_NominalToBinary = null;
}
} else {
m_NominalToBinary = null;
}
if (m_filterType == FILTER_STANDARDIZE) {
m_Filter = new Standardize();
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else if (m_filterType == FILTER_NORMALIZE) {
m_Filter = new Normalize();
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else {
m_Filter = null;
}
m_classIndex = insts.classIndex();
m_classAttribute = insts.classAttribute();
m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
// Generate subsets representing each class
Instances[] subsets = new Instances[insts.numClasses()];
for (int i = 0; i < insts.numClasses(); i++) {
subsets[i] = new Instances(insts, insts.numInstances());
}
for (int j = 0; j < insts.numInstances(); j++) {
Instance inst = insts.instance(j);
subsets[(int) inst.classValue()].add(inst);
}
for (int i = 0; i < insts.numClasses(); i++) {
subsets[i].compactify();
}
// Build the binary classifiers
Random rand = new Random(m_randomSeed);
m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
for (int i = 0; i < insts.numClasses(); i++) {
for (int j = i + 1; j < insts.numClasses(); j++) {
m_classifiers[i][j] = new BinarySMO();
m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
Instances data = new Instances(insts, insts.numInstances());
for (int k = 0; k < subsets[i].numInstances(); k++) {
data.add(subsets[i].instance(k));
}
for (int k = 0; k < subsets[j].numInstances(); k++) {
data.add(subsets[j].instance(k));
}
data.compactify();
data.randomize(rand);
m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
}
}
}
use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.
the class SMOreg method buildClassifier.
/**
* Method for building the classifier.
*
* @param instances the set of training instances
* @throws Exception if the classifier can't be built successfully
*/
public void buildClassifier(Instances instances) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(instances);
// remove instances with missing class
instances = new Instances(instances);
instances.deleteWithMissingClass();
// Removes all the instances with weight equal to 0.
// MUST be done since condition (8) of Keerthi's paper
// is made with the assertion Ci > 0 (See equation (3a).
Instances data = new Instances(instances, 0);
for (int i = 0; i < instances.numInstances(); i++) {
if (instances.instance(i).weight() > 0) {
data.add(instances.instance(i));
}
}
if (data.numInstances() == 0) {
throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!");
}
instances = data;
m_onlyNumeric = true;
for (int i = 0; i < instances.numAttributes(); i++) {
if (i != instances.classIndex()) {
if (!instances.attribute(i).isNumeric()) {
m_onlyNumeric = false;
break;
}
}
}
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Missing);
if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
if (!m_onlyNumeric) {
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(instances);
instances = Filter.useFilter(instances, m_NominalToBinary);
} else {
m_NominalToBinary = null;
}
} else {
m_NominalToBinary = null;
}
// retrieve two different class values used to determine filter transformation
double y0 = instances.instance(0).classValue();
int index = 1;
while (index < instances.numInstances() && instances.instance(index).classValue() == y0) {
index++;
}
if (index == instances.numInstances()) {
// we don't want to deal with this, too much hassle
throw new Exception("All class values are the same. At least two class values should be different");
}
double y1 = instances.instance(index).classValue();
// apply filters
if (m_filterType == FILTER_STANDARDIZE) {
m_Filter = new Standardize();
((Standardize) m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Filter);
} else if (m_filterType == FILTER_NORMALIZE) {
m_Filter = new Normalize();
((Normalize) m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Filter);
} else {
m_Filter = null;
}
if (m_Filter != null) {
double z0 = instances.instance(0).classValue();
double z1 = instances.instance(index).classValue();
// no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
m_x1 = (y0 - y1) / (z0 - z1);
// = y1 - m_x1 * z1
m_x0 = (y0 - m_x1 * z0);
} else {
m_x1 = 1.0;
m_x0 = 0.0;
}
m_optimizer.setSMOReg(this);
m_optimizer.buildClassifier(instances);
}
use of weka.filters.unsupervised.attribute.ReplaceMissingValues in project umple by umple.
the class SimpleLogistic method buildClassifier.
/**
* Builds the logistic regression using LogitBoost.
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
// replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
// convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
// create actual logistic model
m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
// build logistic model
m_boostedModel.buildClassifier(data);
}
Aggregations