use of weka.filters.unsupervised.instance.RemoveWithValues in project ambit-mirror by ideaconsult.
the class FilteredWekaModelBuilder method process.
public ModelQueryResults process(Algorithm algorithm) throws AmbitException {
List<Filter> filters = new ArrayList<Filter>();
Instances instances = trainingData;
if ((instances == null) || (instances.numInstances() == 0) || (instances.numAttributes() == 0))
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Empty dataset!");
Object weka = null;
try {
Class clazz = this.getClass().getClassLoader().loadClass(algorithm.getContent().toString());
weka = clazz.newInstance();
} catch (Exception x) {
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, x.getMessage(), x);
}
if (targetURI != null)
for (String t : targetURI) for (int i = 0; i < instances.numAttributes(); i++) if (instances.attribute(i).name().equals(t)) {
instances.setClassIndex(i);
break;
}
fclusterer = null;
fclassifier = null;
pca = null;
if (weka instanceof Clusterer) {
fclusterer = new FilteredClusterer();
fclusterer.setClusterer((Clusterer) weka);
} else if (weka instanceof Classifier) {
fclassifier = new FilteredClassifier();
fclassifier.setClassifier((Classifier) weka);
if (targetURI == null)
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "No target variable! " + OpenTox.params.target);
if (instances.classIndex() < 0)
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "No target variable! " + OpenTox.params.target);
if (weka instanceof IBk) {
String[] options = new String[3];
options[0] = "-K";
options[1] = "-20";
options[2] = "-X";
try {
((IBk) weka).setOptions(options);
} catch (Exception x) {
}
}
} else if (weka instanceof PrincipalComponents) {
pca = (PrincipalComponents) weka;
} else
throw new AmbitException(String.format("Unknown algorithm %s", algorithm.toString()));
String[] prm = algorithm.getParametersAsArray();
if (prm != null)
try {
if (fclassifier != null)
fclassifier.getClassifier().setOptions(prm);
else if (pca != null)
pca.setOptions(prm);
else if (fclusterer != null) {
fclusterer.getClusterer().getClass().getMethod("setOptions", new Class[] {}).invoke(fclusterer.getClusterer(), prm);
}
} catch (Exception x) {
Context.getCurrentLogger().warning("Error setting algorithm parameters, assuming defaults" + x.getMessage());
}
try {
// remove firstCompoundID attribute
String[] options = new String[2];
options[0] = "-R";
options[1] = "1";
Remove remove = new Remove();
remove.setOptions(options);
filters.add(remove);
} catch (Exception x) {
throw new AmbitException(x);
}
try {
// remove missing values
if (!hasCapability(Capability.MISSING_VALUES)) {
ReplaceMissingValues missing = new ReplaceMissingValues();
// can't make it working with RemoveWithValues...
String[] options = new String[1];
options[0] = "-M";
missing.setOptions(options);
filters.add(missing);
}
} catch (Exception x) {
throw new AmbitException(x);
}
if (instances.classIndex() >= 0)
try {
// num/nom support
if (instances.attribute(instances.classIndex()).isNominal()) {
if (!hasCapability(Capability.NOMINAL_CLASS)) {
if (hasCapability(Capability.BINARY_CLASS)) {
// nominal 2 binary
NominalToBinary nom2bin = new NominalToBinary();
String[] options = new String[2];
options[0] = "-R";
options[1] = Integer.toString(instances.classIndex());
nom2bin.setOptions(options);
filters.add(nom2bin);
}
}
} else if (instances.attribute(instances.classIndex()).isNumeric()) {
if (!hasCapability(Capability.NUMERIC_CLASS)) {
if (hasCapability(Capability.NOMINAL_CLASS)) {
// numeric to nominal, i.e. Discretize
Discretize num2nom = new Discretize();
String[] options = new String[2];
options[0] = "-R";
options[1] = Integer.toString(instances.classIndex());
num2nom.setOptions(options);
filters.add(num2nom);
}
}
// else all is well
} else if (instances.attribute(instances.classIndex()).isString()) {
if (hasCapability(Capability.NOMINAL_CLASS)) {
StringToNominal str2nom = new StringToNominal();
String[] options = new String[2];
options[0] = "-R";
options[1] = Integer.toString(instances.classIndex());
str2nom.setOptions(options);
filters.add(str2nom);
}
}
if (!hasCapability(Capability.MISSING_CLASS_VALUES)) {
RemoveWithValues missing = new RemoveWithValues();
String[] options = new String[3];
options[0] = "-M";
options[1] = "-C";
options[2] = Integer.toString(instances.classIndex());
missing.setOptions(options);
filters.add(missing);
}
if (fclassifier == null) {
// clusterer, ignore the class attr
try {
// remove firstCompoundID attribute
String[] options = new String[2];
options[0] = "-R";
options[1] = Integer.toString(instances.classIndex());
Remove remove = new Remove();
remove.setOptions(options);
filters.add(remove);
} catch (Exception x) {
throw new AmbitException(x);
}
}
} catch (Exception x) {
throw new AmbitException(x);
}
try {
// all besides the class (if set!)
filters.add(new Standardize());
} catch (Exception x) {
throw new AmbitException(x);
}
// now set the filters
MultiFilter filter = new MultiFilter();
filter.setFilters(filters.toArray(new Filter[filters.size()]));
Instances newInstances = instances;
if (fclassifier != null)
fclassifier.setFilter(filter);
else if (fclusterer != null)
fclusterer.setFilter(filter);
else {
try {
filter.setInputFormat(instances);
newInstances = Filter.useFilter(instances, filter);
} catch (Exception x) {
throw new AmbitException(x);
}
}
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyMMddhhmmss");
Date timestamp = new Date(System.currentTimeMillis());
String name = String.format("%s.%s.%s", simpleDateFormat.format(new Date(System.currentTimeMillis())), UUID.randomUUID().toString(), weka.getClass().getName());
ModelQueryResults m = new ModelQueryResults();
m.setParameters(parameters);
m.setId(null);
m.setContentMediaType(AlgorithmFormat.WEKA.getMediaType());
m.setName(name);
m.setAlgorithm(alg_reporter.getURI(algorithm));
AlgorithmURIReporter r = new AlgorithmURIReporter();
LiteratureEntry entry = new LiteratureEntry(name, algorithm == null ? weka.getClass().getName() : r.getURI(applicationRootReference.toString(), algorithm));
LiteratureEntry prediction = new LiteratureEntry(m.getName(), model_reporter.getURI(applicationRootReference.toString(), m));
prediction.setType(_type.Model);
Template predictors = null;
Template dependent = null;
PredictedVarsTemplate predicted = null;
if (fclusterer != null) {
try {
fclusterer.buildClusterer(newInstances);
} catch (Exception x) {
throw new AmbitException(x);
}
predicted = new PredictedVarsTemplate(name + "#Predicted");
Property property = new Property("Cluster", prediction);
property.setNominal(true);
predicted.add(property);
dependent = new Template("Empty");
predictors = new Template(name + "#Independent");
for (int i = 0; i < newInstances.numAttributes(); i++) {
property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
property.setOrder(i + 1);
predictors.add(property);
}
} else if (fclassifier != null) {
try {
System.out.println(fclassifier.getClassifier().getCapabilities());
fclassifier.getCapabilities().testWithFail(newInstances);
} catch (Exception x) {
throw new AmbitException(x);
}
try {
// if (classifier instanceof LinearRegression) //don't do feature selection!
// classifier.setOptions(new String[] {"-S","1"});
StringBuilder evaluationString = new StringBuilder();
EvaluationStats<String> stats = new EvaluationStats<String>(EVType.crossvalidation, null);
Evaluation eval = new Evaluation(newInstances);
if (newInstances.numInstances() > 20) {
eval.crossValidateModel(fclassifier, newInstances, 10, new Random(1));
evaluationString.append("Crossvalidation 10 folds\n");
} else {
eval.crossValidateModel(fclassifier, newInstances, 2, new Random(1));
evaluationString.append("Crossvalidation 2 folds\n");
}
try {
evaluationString.append(eval.toSummaryString());
evaluationString.append("\n");
} catch (Exception x) {
}
try {
evaluationString.append(eval.toClassDetailsString());
evaluationString.append("\n");
evaluationString.append(eval.toMatrixString());
evaluationString.append("\n");
} catch (Exception x) {
}
try {
evaluationString.append(eval.weightedAreaUnderROC());
} catch (Exception x) {
}
try {
stats.setMAE(eval.meanAbsoluteError());
} catch (Exception x) {
}
try {
stats.setRMSE(eval.rootMeanSquaredError());
} catch (Exception x) {
}
try {
stats.setPctCorrect(eval.pctCorrect());
stats.setPctInCorrect(eval.pctIncorrect());
} catch (Exception x) {
}
stats.setContent(evaluationString.toString());
m.addEvaluation(stats);
stats = new EvaluationStats<String>(EVType.evaluation_training, null);
evaluationString = new StringBuilder();
fclassifier.buildClassifier(newInstances);
eval = new Evaluation(newInstances);
eval.evaluateModel(fclassifier, newInstances);
try {
evaluationString.append("\nTraining dataset statistics\n");
evaluationString.append(eval.toSummaryString());
evaluationString.append("\n");
} catch (Exception x) {
}
try {
evaluationString.append(eval.toMatrixString());
evaluationString.append("\n");
} catch (Exception x) {
}
try {
stats.setMAE(eval.meanAbsoluteError());
} catch (Exception x) {
}
try {
stats.setRMSE(eval.rootMeanSquaredError());
} catch (Exception x) {
}
try {
stats.setPctCorrect(eval.pctCorrect());
stats.setPctInCorrect(eval.pctIncorrect());
} catch (Exception x) {
}
stats.setContent(evaluationString.toString());
m.addEvaluation(stats);
} catch (WekaException x) {
throw new AmbitException(x);
} catch (Exception x) {
throw new AmbitException(x);
}
;
dependent = new Template(name + "#Dependent");
Property property = createPropertyFromReference(new Reference(newInstances.attribute(newInstances.classIndex()).name()), entry, referer);
dependent.add(property);
predicted = new PredictedVarsTemplate(name + "#Predicted");
Property predictedProperty = new Property(property.getName(), prediction);
predictedProperty.setLabel(property.getLabel());
predictedProperty.setUnits(property.getUnits());
predictedProperty.setClazz(property.getClazz());
predictedProperty.setNominal(property.isNominal());
predicted.add(predictedProperty);
predictedProperty.setEnabled(true);
if (supportsDistribution(fclassifier)) {
Property confidenceProperty = new Property(String.format("%s Confidence", property.getName()), prediction);
confidenceProperty.setLabel(Property.opentox_ConfidenceFeature);
confidenceProperty.setUnits("");
confidenceProperty.setClazz(Number.class);
confidenceProperty.setEnabled(true);
PropertyAnnotation<Property> a = new PropertyAnnotation<Property>();
a.setType(OT.OTClass.ModelConfidenceFeature.name());
a.setPredicate(OT.OTProperty.confidenceOf.name());
a.setObject(predictedProperty);
PropertyAnnotations aa = new PropertyAnnotations();
aa.add(a);
confidenceProperty.setAnnotations(aa);
predicted.add(confidenceProperty);
}
predictors = new Template(name + "#Independent");
for (int i = 0; i < newInstances.numAttributes(); i++) {
if ("CompoundURI".equals(newInstances.attribute(i).name()))
continue;
if (newInstances.classIndex() == i)
continue;
property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
property.setOrder(i + 1);
predictors.add(property);
}
} else if (pca != null) {
try {
pca.setVarianceCovered(1.0);
pca.buildEvaluator(newInstances);
} catch (Exception x) {
throw new AmbitException(x);
}
Property property;
dependent = new Template("Empty");
predictors = new Template(name + "#Independent");
for (int i = 0; i < newInstances.numAttributes(); i++) {
if ("CompoundURI".equals(newInstances.attribute(i).name()))
continue;
if (newInstances.classIndex() == i)
continue;
property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
property.setOrder(i + 1);
predictors.add(property);
}
predicted = new PredictedVarsTemplate(name + "#Predicted");
for (int i = 0; i < newInstances.numAttributes(); i++) {
if (newInstances.classIndex() == i)
continue;
property = createPropertyFromReference(new Reference(String.format("PCA_%d", i + 1)), entry, referer);
property.setClazz(Number.class);
property.setOrder(i + 1);
predicted.add(property);
}
}
m.setPredictors(predictors);
m.setDependent(dependent);
m.setPredicted(predicted);
try {
serializeModel(fclusterer == null ? fclassifier == null ? pca : fclassifier : fclusterer, newInstances, m);
} catch (Exception x) {
throw new AmbitException(x);
}
return m;
}
use of weka.filters.unsupervised.instance.RemoveWithValues in project ambit-mirror by ideaconsult.
the class CoverageModelBuilder method process.
public ModelQueryResults process(Algorithm algorithm) throws AmbitException {
Instances instances = trainingData;
if ((instances == null) || (instances.numInstances() == 0) || (instances.numAttributes() == 0))
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Empty dataset!");
try {
RemoveWithValues removeMissingValues = new RemoveWithValues();
String[] options = new String[1];
options[0] = "-M";
removeMissingValues.setOptions(options);
removeMissingValues.setInputFormat(instances);
Instances newInstances = Filter.useFilter(instances, removeMissingValues);
instances = newInstances;
} catch (Exception x) {
// use unfiltered
}
// int numAttr = 0;
// for (int j=0; j < instances.numAttributes();j++)
// if (instances.attribute(j).isNumeric()) numAttr++;
// if (numAttr==0) throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST,"No numeric attributes!");
Matrix matrix = new Matrix(instances.numInstances(), instances.numAttributes() - 1);
for (int i = 0; i < instances.numInstances(); i++) for (int j = 1; j < instances.numAttributes(); j++) try {
double value = instances.instance(i).value(j);
if (Double.isNaN(value))
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, String.format("Missing value %s in record %s", instances.attribute(j), instances.instance(i)));
matrix.set(i, j - 1, value);
} catch (ResourceException x) {
throw x;
} catch (Exception x) {
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, x.getMessage(), x);
}
DataCoverage coverage = null;
try {
Class clazz = this.getClass().getClassLoader().loadClass(algorithm.getContent().toString());
coverage = (DataCoverage) clazz.newInstance();
} catch (Exception x) {
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, x.getMessage(), x);
}
String name = String.format("%s.%s", UUID.randomUUID().toString(), coverage.getName());
ModelQueryResults m = new ModelQueryResults();
m.setParameters(parameters);
m.setId(null);
m.setContentMediaType(AlgorithmFormat.WEKA.getMediaType());
m.setName(name);
m.setAlgorithm(alg_reporter.getURI(algorithm));
AlgorithmURIReporter r = new AlgorithmURIReporter();
LiteratureEntry entry = new LiteratureEntry(name, algorithm == null ? coverage.getClass().getName() : r.getURI(applicationRootReference.toString(), algorithm));
LiteratureEntry prediction = new LiteratureEntry(m.getName(), model_reporter.getURI(applicationRootReference.toString(), m));
prediction.setType(_type.Model);
Template predictors = null;
Template dependent = null;
PredictedVarsTemplate predicted = null;
if (coverage != null) {
coverage.build(matrix);
predicted = new PredictedVarsTemplate(name + "#ApplicabilityDomain");
Property property = new Property(coverage.getMetricName(), prediction);
property.setEnabled(true);
property.setLabel(String.format("http://www.opentox.org/api/1.1#%s", coverage.getMetricName()));
predicted.add(property);
property = new Property(coverage.getDomainName(), prediction);
property.setLabel(Property.opentox_ConfidenceFeature);
property.setClazz(Number.class);
property.setEnabled(true);
// this is a confidence feature
if (predictedFeatureURI != null) {
PropertyAnnotation<String> a = new PropertyAnnotation<String>();
a.setType(OT.OTClass.ModelConfidenceFeature.name());
a.setPredicate(OT.OTProperty.confidenceOf.name());
a.setObject(predictedFeatureURI);
PropertyAnnotations aa = new PropertyAnnotations();
aa.add(a);
property.setAnnotations(aa);
}
predicted.add(property);
dependent = new Template("Empty");
predictors = new Template(name + "#Independent");
for (int i = 1; i < instances.numAttributes(); i++) {
property = createPropertyFromReference(new Reference(instances.attribute(i).name()), entry, referer);
property.setOrder(i + 1);
predictors.add(property);
}
}
m.setPredictors(predictors);
m.setDependent(dependent);
m.setPredicted(predicted);
try {
serializeModel(coverage, instances, m);
} catch (IOException x) {
throw new AmbitException(x);
}
m.setContentMediaType(AlgorithmFormat.COVERAGE_SERIALIZED.getMediaType());
return m;
}
use of weka.filters.unsupervised.instance.RemoveWithValues in project umple by umple.
the class MultiClassClassifier method buildClassifier.
/**
* Builds the classifiers.
*
* @param insts the training data.
* @throws Exception if a classifier can't be built
*/
public void buildClassifier(Instances insts) throws Exception {
Instances newInsts;
// can classifier handle the data?
getCapabilities().testWithFail(insts);
// zero training instances - could be incremental
boolean zeroTrainingInstances = insts.numInstances() == 0;
// remove instances with missing class
insts = new Instances(insts);
insts.deleteWithMissingClass();
if (m_Classifier == null) {
throw new Exception("No base classifier has been set!");
}
m_ZeroR = new ZeroR();
m_ZeroR.buildClassifier(insts);
m_TwoClassDataset = null;
int numClassifiers = insts.numClasses();
if (numClassifiers <= 2) {
m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, 1);
m_Classifiers[0].buildClassifier(insts);
m_ClassFilters = null;
} else if (m_Method == METHOD_1_AGAINST_1) {
// generate fastvector of pairs
ArrayList<int[]> pairs = new ArrayList<int[]>();
for (int i = 0; i < insts.numClasses(); i++) {
for (int j = 0; j < insts.numClasses(); j++) {
if (j <= i)
continue;
int[] pair = new int[2];
pair[0] = i;
pair[1] = j;
pairs.add(pair);
}
}
numClassifiers = pairs.size();
m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, numClassifiers);
m_ClassFilters = new Filter[numClassifiers];
m_SumOfWeights = new double[numClassifiers];
// generate the classifiers
for (int i = 0; i < numClassifiers; i++) {
RemoveWithValues classFilter = new RemoveWithValues();
classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
classFilter.setModifyHeader(true);
classFilter.setInvertSelection(true);
classFilter.setNominalIndicesArr((int[]) pairs.get(i));
Instances tempInstances = new Instances(insts, 0);
tempInstances.setClassIndex(-1);
classFilter.setInputFormat(tempInstances);
newInsts = Filter.useFilter(insts, classFilter);
if (newInsts.numInstances() > 0 || zeroTrainingInstances) {
newInsts.setClassIndex(insts.classIndex());
m_Classifiers[i].buildClassifier(newInsts);
m_ClassFilters[i] = classFilter;
m_SumOfWeights[i] = newInsts.sumOfWeights();
} else {
m_Classifiers[i] = null;
m_ClassFilters[i] = null;
}
}
// construct a two-class header version of the dataset
m_TwoClassDataset = new Instances(insts, 0);
int classIndex = m_TwoClassDataset.classIndex();
m_TwoClassDataset.setClassIndex(-1);
ArrayList<String> classLabels = new ArrayList<String>();
classLabels.add("class0");
classLabels.add("class1");
m_TwoClassDataset.replaceAttributeAt(new Attribute("class", classLabels), classIndex);
m_TwoClassDataset.setClassIndex(classIndex);
} else {
// use error correcting code style methods
Code code = null;
switch(m_Method) {
case METHOD_ERROR_EXHAUSTIVE:
code = new ExhaustiveCode(numClassifiers);
break;
case METHOD_ERROR_RANDOM:
code = new RandomCode(numClassifiers, (int) (numClassifiers * m_RandomWidthFactor), insts);
break;
case METHOD_1_AGAINST_ALL:
code = new StandardCode(numClassifiers);
break;
default:
throw new Exception("Unrecognized correction code type");
}
numClassifiers = code.size();
m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, numClassifiers);
m_ClassFilters = new MakeIndicator[numClassifiers];
for (int i = 0; i < m_Classifiers.length; i++) {
m_ClassFilters[i] = new MakeIndicator();
MakeIndicator classFilter = (MakeIndicator) m_ClassFilters[i];
classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
classFilter.setValueIndices(code.getIndices(i));
classFilter.setNumeric(false);
classFilter.setInputFormat(insts);
newInsts = Filter.useFilter(insts, m_ClassFilters[i]);
m_Classifiers[i].buildClassifier(newInsts);
}
}
m_ClassAttribute = insts.classAttribute();
}
use of weka.filters.unsupervised.instance.RemoveWithValues in project umple by umple.
the class MultiClassClassifier method toString.
/**
* Prints the classifiers.
*
* @return a string representation of the classifier
*/
public String toString() {
if (m_Classifiers == null) {
return "MultiClassClassifier: No model built yet.";
}
StringBuffer text = new StringBuffer();
text.append("MultiClassClassifier\n\n");
for (int i = 0; i < m_Classifiers.length; i++) {
text.append("Classifier ").append(i + 1);
if (m_Classifiers[i] != null) {
if ((m_ClassFilters != null) && (m_ClassFilters[i] != null)) {
if (m_ClassFilters[i] instanceof RemoveWithValues) {
Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
range.setUpper(m_ClassAttribute.numValues());
int[] pair = range.getSelection();
text.append(", " + (pair[0] + 1) + " vs " + (pair[1] + 1));
} else if (m_ClassFilters[i] instanceof MakeIndicator) {
text.append(", using indicator values: ");
text.append(((MakeIndicator) m_ClassFilters[i]).getValueRange());
}
}
text.append('\n');
text.append(m_Classifiers[i].toString() + "\n\n");
} else {
text.append(" Skipped (no training examples)\n");
}
}
return text.toString();
}
Aggregations