use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.
the class SMO method buildClassifier.
/**
* Method for building the classifier. Implements a one-against-one
* wrapper for multi-class problems.
*
* @param insts the set of training instances
* @throws Exception if the classifier can't be built successfully
*/
public void buildClassifier(Instances insts) throws Exception {
if (!m_checksTurnedOff) {
// can classifier handle the data?
getCapabilities().testWithFail(insts);
// remove instances with missing class
insts = new Instances(insts);
insts.deleteWithMissingClass();
/* Removes all the instances with weight equal to 0.
MUST be done since condition (8) of Keerthi's paper
is made with the assertion Ci > 0 (See equation (3a). */
Instances data = new Instances(insts, insts.numInstances());
for (int i = 0; i < insts.numInstances(); i++) {
if (insts.instance(i).weight() > 0)
data.add(insts.instance(i));
}
if (data.numInstances() == 0) {
throw new Exception("No training instances left after removing " + "instances with weight 0!");
}
insts = data;
}
if (!m_checksTurnedOff) {
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Missing);
} else {
m_Missing = null;
}
if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
boolean onlyNumeric = true;
if (!m_checksTurnedOff) {
for (int i = 0; i < insts.numAttributes(); i++) {
if (i != insts.classIndex()) {
if (!insts.attribute(i).isNumeric()) {
onlyNumeric = false;
break;
}
}
}
}
if (!onlyNumeric) {
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(insts);
insts = Filter.useFilter(insts, m_NominalToBinary);
} else {
m_NominalToBinary = null;
}
} else {
m_NominalToBinary = null;
}
if (m_filterType == FILTER_STANDARDIZE) {
m_Filter = new Standardize();
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else if (m_filterType == FILTER_NORMALIZE) {
m_Filter = new Normalize();
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else {
m_Filter = null;
}
m_classIndex = insts.classIndex();
m_classAttribute = insts.classAttribute();
m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
// Generate subsets representing each class
Instances[] subsets = new Instances[insts.numClasses()];
for (int i = 0; i < insts.numClasses(); i++) {
subsets[i] = new Instances(insts, insts.numInstances());
}
for (int j = 0; j < insts.numInstances(); j++) {
Instance inst = insts.instance(j);
subsets[(int) inst.classValue()].add(inst);
}
for (int i = 0; i < insts.numClasses(); i++) {
subsets[i].compactify();
}
// Build the binary classifiers
Random rand = new Random(m_randomSeed);
m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
for (int i = 0; i < insts.numClasses(); i++) {
for (int j = i + 1; j < insts.numClasses(); j++) {
m_classifiers[i][j] = new BinarySMO();
m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
Instances data = new Instances(insts, insts.numInstances());
for (int k = 0; k < subsets[i].numInstances(); k++) {
data.add(subsets[i].instance(k));
}
for (int k = 0; k < subsets[j].numInstances(); k++) {
data.add(subsets[j].instance(k));
}
data.compactify();
data.randomize(rand);
m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
}
}
}
use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.
the class SMOreg method buildClassifier.
/**
* Method for building the classifier.
*
* @param instances the set of training instances
* @throws Exception if the classifier can't be built successfully
*/
public void buildClassifier(Instances instances) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(instances);
// remove instances with missing class
instances = new Instances(instances);
instances.deleteWithMissingClass();
// Removes all the instances with weight equal to 0.
// MUST be done since condition (8) of Keerthi's paper
// is made with the assertion Ci > 0 (See equation (3a).
Instances data = new Instances(instances, 0);
for (int i = 0; i < instances.numInstances(); i++) {
if (instances.instance(i).weight() > 0) {
data.add(instances.instance(i));
}
}
if (data.numInstances() == 0) {
throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!");
}
instances = data;
m_onlyNumeric = true;
for (int i = 0; i < instances.numAttributes(); i++) {
if (i != instances.classIndex()) {
if (!instances.attribute(i).isNumeric()) {
m_onlyNumeric = false;
break;
}
}
}
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Missing);
if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
if (!m_onlyNumeric) {
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(instances);
instances = Filter.useFilter(instances, m_NominalToBinary);
} else {
m_NominalToBinary = null;
}
} else {
m_NominalToBinary = null;
}
// retrieve two different class values used to determine filter transformation
double y0 = instances.instance(0).classValue();
int index = 1;
while (index < instances.numInstances() && instances.instance(index).classValue() == y0) {
index++;
}
if (index == instances.numInstances()) {
// we don't want to deal with this, too much hassle
throw new Exception("All class values are the same. At least two class values should be different");
}
double y1 = instances.instance(index).classValue();
// apply filters
if (m_filterType == FILTER_STANDARDIZE) {
m_Filter = new Standardize();
((Standardize) m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Filter);
} else if (m_filterType == FILTER_NORMALIZE) {
m_Filter = new Normalize();
((Normalize) m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(instances);
instances = Filter.useFilter(instances, m_Filter);
} else {
m_Filter = null;
}
if (m_Filter != null) {
double z0 = instances.instance(0).classValue();
double z1 = instances.instance(index).classValue();
// no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
m_x1 = (y0 - y1) / (z0 - z1);
// = y1 - m_x1 * z1
m_x0 = (y0 - m_x1 * z0);
} else {
m_x1 = 1.0;
m_x0 = 0.0;
}
m_optimizer.setSMOReg(this);
m_optimizer.buildClassifier(instances);
}
use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.
the class SimpleLogistic method buildClassifier.
/**
* Builds the logistic regression using LogitBoost.
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
// replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
// convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
// create actual logistic model
m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
// build logistic model
m_boostedModel.buildClassifier(data);
}
use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.
the class VotedPerceptron method buildClassifier.
/**
* Builds the ensemble of perceptrons.
*
* @param insts the data to train the classifier with
* @throws Exception if something goes wrong during building
*/
public void buildClassifier(Instances insts) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(insts);
// remove instances with missing class
insts = new Instances(insts);
insts.deleteWithMissingClass();
// Filter data
m_Train = new Instances(insts);
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(m_Train);
m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(m_Train);
m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
/**
* Randomize training data
*/
m_Train.randomize(new Random(m_Seed));
/**
* Make space to store perceptrons
*/
m_Additions = new int[m_MaxK + 1];
m_IsAddition = new boolean[m_MaxK + 1];
m_Weights = new int[m_MaxK + 1];
/**
* Compute perceptrons
*/
m_K = 0;
out: for (int it = 0; it < m_NumIterations; it++) {
for (int i = 0; i < m_Train.numInstances(); i++) {
Instance inst = m_Train.instance(i);
if (!inst.classIsMissing()) {
int prediction = makePrediction(m_K, inst);
int classValue = (int) inst.classValue();
if (prediction == classValue) {
m_Weights[m_K]++;
} else {
m_IsAddition[m_K] = (classValue == 1);
m_Additions[m_K] = i;
m_K++;
m_Weights[m_K]++;
}
if (m_K == m_MaxK) {
break out;
}
}
}
}
}
use of weka.filters.unsupervised.attribute.NominalToBinary in project umple by umple.
the class Logistic method buildClassifier.
/**
* Builds the classifier
*
* @param train the training data to be used for generating the boosted
* classifier.
* @throws Exception if the classifier could not be built successfully
*/
@Override
public void buildClassifier(Instances train) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(train);
// remove instances with missing class
train = new Instances(train);
train.deleteWithMissingClass();
// Replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(train);
train = Filter.useFilter(train, m_ReplaceMissingValues);
// Remove useless attributes
m_AttFilter = new RemoveUseless();
m_AttFilter.setInputFormat(train);
train = Filter.useFilter(train, m_AttFilter);
// Transform attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(train);
train = Filter.useFilter(train, m_NominalToBinary);
// Save the structure for printing the model
m_structure = new Instances(train, 0);
// Extract data
m_ClassIndex = train.classIndex();
m_NumClasses = train.numClasses();
// Only K-1 class labels needed
int nK = m_NumClasses - 1;
int nR = m_NumPredictors = train.numAttributes() - 1;
int nC = train.numInstances();
// Data values
m_Data = new double[nC][nR + 1];
// Class labels
int[] Y = new int[nC];
// Attribute means
double[] xMean = new double[nR + 1];
// Attribute stddev's
double[] xSD = new double[nR + 1];
// Number of classes
double[] sY = new double[nK + 1];
// Weights of instances
double[] weights = new double[nC];
// Total weights of the instances
double totWeights = 0;
// Optimized parameter values
m_Par = new double[nR + 1][nK];
if (m_Debug) {
System.out.println("Extracting data...");
}
for (int i = 0; i < nC; i++) {
// initialize X[][]
Instance current = train.instance(i);
// Class value starts from 0
Y[i] = (int) current.classValue();
// Dealing with weights
weights[i] = current.weight();
totWeights += weights[i];
m_Data[i][0] = 1;
int j = 1;
for (int k = 0; k <= nR; k++) {
if (k != m_ClassIndex) {
double x = current.value(k);
m_Data[i][j] = x;
xMean[j] += weights[i] * x;
xSD[j] += weights[i] * x * x;
j++;
}
}
// Class count
sY[Y[i]]++;
}
if ((totWeights <= 1) && (nC > 1)) {
throw new Exception("Sum of weights of instances less than 1, please reweight!");
}
xMean[0] = 0;
xSD[0] = 1;
for (int j = 1; j <= nR; j++) {
xMean[j] = xMean[j] / totWeights;
if (totWeights > 1) {
xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
} else {
xSD[j] = 0;
}
}
if (m_Debug) {
// Output stats about input data
System.out.println("Descriptives...");
for (int m = 0; m <= nK; m++) {
System.out.println(sY[m] + " cases have class " + m);
}
System.out.println("\n Variable Avg SD ");
for (int j = 1; j <= nR; j++) {
System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4));
}
}
// Normalise input data
for (int i = 0; i < nC; i++) {
for (int j = 0; j <= nR; j++) {
if (xSD[j] != 0) {
m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
}
}
}
if (m_Debug) {
System.out.println("\nIteration History...");
}
double[] x = new double[(nR + 1) * nK];
// Boundary constraints, N/A here
double[][] b = new double[2][x.length];
// Initialize
for (int p = 0; p < nK; p++) {
int offset = p * (nR + 1);
// Null model
x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0);
b[0][offset] = Double.NaN;
b[1][offset] = Double.NaN;
for (int q = 1; q <= nR; q++) {
x[offset + q] = 0.0;
b[0][offset + q] = Double.NaN;
b[1][offset + q] = Double.NaN;
}
}
OptObject oO = new OptObject();
oO.setWeights(weights);
oO.setClassLabels(Y);
Optimization opt = null;
if (m_useConjugateGradientDescent) {
opt = new OptEngCG(oO);
} else {
opt = new OptEng(oO);
}
opt.setDebug(m_Debug);
if (m_MaxIts == -1) {
// Search until convergence
x = opt.findArgmin(x, b);
while (x == null) {
x = opt.getVarbValues();
if (m_Debug) {
System.out.println("First set of iterations finished, not enough!");
}
x = opt.findArgmin(x, b);
}
if (m_Debug) {
System.out.println(" -------------<Converged>--------------");
}
} else {
opt.setMaxIteration(m_MaxIts);
x = opt.findArgmin(x, b);
if (x == null) {
x = opt.getVarbValues();
}
}
// Log-likelihood
m_LL = -opt.getMinFunction();
// Don't need data matrix anymore
m_Data = null;
// Convert coefficients back to non-normalized attribute units
for (int i = 0; i < nK; i++) {
m_Par[0][i] = x[i * (nR + 1)];
for (int j = 1; j <= nR; j++) {
m_Par[j][i] = x[i * (nR + 1) + j];
if (xSD[j] != 0) {
m_Par[j][i] /= xSD[j];
m_Par[0][i] -= m_Par[j][i] * xMean[j];
}
}
}
}
Aggregations