use of cosmos.distribution.v1beta1.Distribution in project umple by umple.
the class C45PruneableDecList method buildDecList.
/**
* Builds the partial tree without hold out set.
*
* @exception Exception if something goes wrong
*/
@Override
public void buildDecList(Instances data, boolean leaf) throws Exception {
Instances[] localInstances;
int ind;
int i, j;
double sumOfWeights;
NoSplit noSplit;
m_train = null;
m_test = null;
m_isLeaf = false;
m_isEmpty = false;
m_sons = null;
indeX = 0;
sumOfWeights = data.sumOfWeights();
noSplit = new NoSplit(new Distribution(data));
if (leaf) {
m_localModel = noSplit;
} else {
m_localModel = m_toSelectModel.selectModel(data);
}
if (m_localModel.numSubsets() > 1) {
localInstances = m_localModel.split(data);
data = null;
m_sons = new ClassifierDecList[m_localModel.numSubsets()];
i = 0;
do {
i++;
ind = chooseIndex();
if (ind == -1) {
for (j = 0; j < m_sons.length; j++) {
if (m_sons[j] == null) {
m_sons[j] = getNewDecList(localInstances[j], true);
}
}
if (i < 2) {
m_localModel = noSplit;
m_isLeaf = true;
m_sons = null;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
return;
}
ind = 0;
break;
} else {
m_sons[ind] = getNewDecList(localInstances[ind], false);
}
} while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
// Check if all successors are leaves
for (j = 0; j < m_sons.length; j++) {
if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
break;
}
}
if (j == m_sons.length) {
pruneEnd();
if (!m_isLeaf) {
indeX = chooseLastIndex();
}
} else {
indeX = chooseLastIndex();
}
} else {
m_isLeaf = true;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
}
}
use of cosmos.distribution.v1beta1.Distribution in project umple by umple.
the class PruneableDecList method buildDecList.
/**
* Builds the partial tree with hold out set
*
* @throws Exception if something goes wrong
*/
public void buildDecList(Instances train, Instances test, boolean leaf) throws Exception {
Instances[] localTrain, localTest;
int ind;
int i, j;
double sumOfWeights;
NoSplit noSplit;
m_train = null;
m_isLeaf = false;
m_isEmpty = false;
m_sons = null;
indeX = 0;
sumOfWeights = train.sumOfWeights();
noSplit = new NoSplit(new Distribution(train));
if (leaf) {
m_localModel = noSplit;
} else {
m_localModel = m_toSelectModel.selectModel(train, test);
}
m_test = new Distribution(test, m_localModel);
if (m_localModel.numSubsets() > 1) {
localTrain = m_localModel.split(train);
localTest = m_localModel.split(test);
train = null;
test = null;
m_sons = new ClassifierDecList[m_localModel.numSubsets()];
i = 0;
do {
i++;
ind = chooseIndex();
if (ind == -1) {
for (j = 0; j < m_sons.length; j++) {
if (m_sons[j] == null) {
m_sons[j] = getNewDecList(localTrain[j], localTest[j], true);
}
}
if (i < 2) {
m_localModel = noSplit;
m_isLeaf = true;
m_sons = null;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
return;
}
ind = 0;
break;
} else {
m_sons[ind] = getNewDecList(localTrain[ind], localTest[ind], false);
}
} while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
// Check if all successors are leaves
for (j = 0; j < m_sons.length; j++) {
if ((m_sons[j] == null) || (!m_sons[j].m_isLeaf)) {
break;
}
}
if (j == m_sons.length) {
pruneEnd();
if (!m_isLeaf) {
indeX = chooseLastIndex();
}
} else {
indeX = chooseLastIndex();
}
} else {
m_isLeaf = true;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
}
}
use of cosmos.distribution.v1beta1.Distribution in project umple by umple.
the class ResidualModelSelection method selectModel.
/**
* Selects split based on residuals for the given dataset.
*/
public final ClassifierSplitModel selectModel(Instances data, double[][] dataZs, double[][] dataWs) throws Exception {
int numAttributes = data.numAttributes();
if (numAttributes < 2)
throw new Exception("Can't select Model without non-class attribute");
if (data.numInstances() < m_minNumInstances)
return new NoSplit(new Distribution(data));
double bestGain = -Double.MAX_VALUE;
int bestAttribute = -1;
// try split on every attribute
for (int i = 0; i < numAttributes; i++) {
if (i != data.classIndex()) {
// build split
ResidualSplit split = new ResidualSplit(i);
split.buildClassifier(data, dataZs, dataWs);
if (split.checkModel(m_minNumInstances)) {
// evaluate split
double gain = split.entropyGain();
if (gain > bestGain) {
bestGain = gain;
bestAttribute = i;
}
}
}
}
if (bestGain >= m_minInfoGain) {
// return best split
ResidualSplit split = new ResidualSplit(bestAttribute);
split.buildClassifier(data, dataZs, dataWs);
return split;
} else {
// could not find any split with enough information gain
return new NoSplit(new Distribution(data));
}
}
use of cosmos.distribution.v1beta1.Distribution in project umple by umple.
the class ClassifierDecList method buildDecList.
/**
* Builds the partial tree without hold out set.
*
* @exception Exception if something goes wrong
*/
public void buildDecList(Instances data, boolean leaf) throws Exception {
Instances[] localInstances;
int ind;
int i, j;
double sumOfWeights;
NoSplit noSplit;
m_train = null;
m_test = null;
m_isLeaf = false;
m_isEmpty = false;
m_sons = null;
indeX = 0;
sumOfWeights = data.sumOfWeights();
noSplit = new NoSplit(new Distribution(data));
if (leaf) {
m_localModel = noSplit;
} else {
m_localModel = m_toSelectModel.selectModel(data);
}
if (m_localModel.numSubsets() > 1) {
localInstances = m_localModel.split(data);
data = null;
m_sons = new ClassifierDecList[m_localModel.numSubsets()];
i = 0;
do {
i++;
ind = chooseIndex();
if (ind == -1) {
for (j = 0; j < m_sons.length; j++) {
if (m_sons[j] == null) {
m_sons[j] = getNewDecList(localInstances[j], true);
}
}
if (i < 2) {
m_localModel = noSplit;
m_isLeaf = true;
m_sons = null;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
return;
}
ind = 0;
break;
} else {
m_sons[ind] = getNewDecList(localInstances[ind], false);
}
} while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
// Choose rule
indeX = chooseLastIndex();
} else {
m_isLeaf = true;
if (Utils.eq(sumOfWeights, 0)) {
m_isEmpty = true;
}
}
}
use of cosmos.distribution.v1beta1.Distribution in project umple by umple.
the class ResidualSplit method buildClassifier.
/**
* Builds the split.
* Needs the Z/W values of LogitBoost for the set of instances.
*/
public void buildClassifier(Instances data, double[][] dataZs, double[][] dataWs) throws Exception {
m_numClasses = data.numClasses();
m_numInstances = data.numInstances();
if (m_numInstances == 0)
throw new Exception("Can't build split on 0 instances");
// save data/Zs/Ws
m_data = data;
m_dataZs = dataZs;
m_dataWs = dataWs;
m_attribute = data.attribute(m_attIndex);
// determine number of subsets and split point for numeric attributes
if (m_attribute.isNominal()) {
m_splitPoint = 0.0;
m_numSubsets = m_attribute.numValues();
} else {
getSplitPoint();
m_numSubsets = 2;
}
// create distribution for data
m_distribution = new Distribution(data, this);
}
Aggregations