Search in sources :

Example 1 with Standardize

use of weka.filters.unsupervised.attribute.Standardize in project umple by umple.

the class SMO method buildClassifier.

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {
    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);
        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        /* Removes all the instances with weight equal to 0.
       MUST be done since condition (8) of Keerthi's paper 
       is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }
    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }
        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }
    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);
    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }
    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}
Also used : Normalize(weka.filters.unsupervised.attribute.Normalize) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Random(java.util.Random) PolyKernel(weka.classifiers.functions.supportVector.PolyKernel) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 2 with Standardize

use of weka.filters.unsupervised.attribute.Standardize in project umple by umple.

the class SMOreg method buildClassifier.

/**
 * Method for building the classifier.
 *
 * @param instances the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    // Removes all the instances with weight equal to 0.
    // MUST be done since condition (8) of Keerthi's paper
    // is made with the assertion Ci > 0 (See equation (3a).
    Instances data = new Instances(instances, 0);
    for (int i = 0; i < instances.numInstances(); i++) {
        if (instances.instance(i).weight() > 0) {
            data.add(instances.instance(i));
        }
    }
    if (data.numInstances() == 0) {
        throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!");
    }
    instances = data;
    m_onlyNumeric = true;
    for (int i = 0; i < instances.numAttributes(); i++) {
        if (i != instances.classIndex()) {
            if (!instances.attribute(i).isNumeric()) {
                m_onlyNumeric = false;
                break;
            }
        }
    }
    m_Missing = new ReplaceMissingValues();
    m_Missing.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_Missing);
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        if (!m_onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(instances);
            instances = Filter.useFilter(instances, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    // retrieve two different class values used to determine filter transformation
    double y0 = instances.instance(0).classValue();
    int index = 1;
    while (index < instances.numInstances() && instances.instance(index).classValue() == y0) {
        index++;
    }
    if (index == instances.numInstances()) {
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = instances.instance(index).classValue();
    // apply filters
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        ((Standardize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        ((Normalize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_Filter);
    } else {
        m_Filter = null;
    }
    if (m_Filter != null) {
        double z0 = instances.instance(0).classValue();
        double z1 = instances.instance(index).classValue();
        // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
        m_x1 = (y0 - y1) / (z0 - z1);
        // = y1 - m_x1 * z1
        m_x0 = (y0 - m_x1 * z0);
    } else {
        m_x1 = 1.0;
        m_x0 = 0.0;
    }
    m_optimizer.setSMOReg(this);
    m_optimizer.buildClassifier(instances);
}
Also used : Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Normalize(weka.filters.unsupervised.attribute.Normalize) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 3 with Standardize

use of weka.filters.unsupervised.attribute.Standardize in project ambit-mirror by ideaconsult.

the class FilteredWekaModelBuilder method process.

public ModelQueryResults process(Algorithm algorithm) throws AmbitException {
    List<Filter> filters = new ArrayList<Filter>();
    Instances instances = trainingData;
    if ((instances == null) || (instances.numInstances() == 0) || (instances.numAttributes() == 0))
        throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Empty dataset!");
    Object weka = null;
    try {
        Class clazz = this.getClass().getClassLoader().loadClass(algorithm.getContent().toString());
        weka = clazz.newInstance();
    } catch (Exception x) {
        throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, x.getMessage(), x);
    }
    if (targetURI != null)
        for (String t : targetURI) for (int i = 0; i < instances.numAttributes(); i++) if (instances.attribute(i).name().equals(t)) {
            instances.setClassIndex(i);
            break;
        }
    fclusterer = null;
    fclassifier = null;
    pca = null;
    if (weka instanceof Clusterer) {
        fclusterer = new FilteredClusterer();
        fclusterer.setClusterer((Clusterer) weka);
    } else if (weka instanceof Classifier) {
        fclassifier = new FilteredClassifier();
        fclassifier.setClassifier((Classifier) weka);
        if (targetURI == null)
            throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "No target variable! " + OpenTox.params.target);
        if (instances.classIndex() < 0)
            throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "No target variable! " + OpenTox.params.target);
        if (weka instanceof IBk) {
            String[] options = new String[3];
            options[0] = "-K";
            options[1] = "-20";
            options[2] = "-X";
            try {
                ((IBk) weka).setOptions(options);
            } catch (Exception x) {
            }
        }
    } else if (weka instanceof PrincipalComponents) {
        pca = (PrincipalComponents) weka;
    } else
        throw new AmbitException(String.format("Unknown algorithm %s", algorithm.toString()));
    String[] prm = algorithm.getParametersAsArray();
    if (prm != null)
        try {
            if (fclassifier != null)
                fclassifier.getClassifier().setOptions(prm);
            else if (pca != null)
                pca.setOptions(prm);
            else if (fclusterer != null) {
                fclusterer.getClusterer().getClass().getMethod("setOptions", new Class[] {}).invoke(fclusterer.getClusterer(), prm);
            }
        } catch (Exception x) {
            Context.getCurrentLogger().warning("Error setting algorithm parameters, assuming defaults" + x.getMessage());
        }
    try {
        // remove firstCompoundID attribute
        String[] options = new String[2];
        options[0] = "-R";
        options[1] = "1";
        Remove remove = new Remove();
        remove.setOptions(options);
        filters.add(remove);
    } catch (Exception x) {
        throw new AmbitException(x);
    }
    try {
        // remove missing values
        if (!hasCapability(Capability.MISSING_VALUES)) {
            ReplaceMissingValues missing = new ReplaceMissingValues();
            // can't make it working with RemoveWithValues...
            String[] options = new String[1];
            options[0] = "-M";
            missing.setOptions(options);
            filters.add(missing);
        }
    } catch (Exception x) {
        throw new AmbitException(x);
    }
    if (instances.classIndex() >= 0)
        try {
            // num/nom support
            if (instances.attribute(instances.classIndex()).isNominal()) {
                if (!hasCapability(Capability.NOMINAL_CLASS)) {
                    if (hasCapability(Capability.BINARY_CLASS)) {
                        // nominal 2 binary
                        NominalToBinary nom2bin = new NominalToBinary();
                        String[] options = new String[2];
                        options[0] = "-R";
                        options[1] = Integer.toString(instances.classIndex());
                        nom2bin.setOptions(options);
                        filters.add(nom2bin);
                    }
                }
            } else if (instances.attribute(instances.classIndex()).isNumeric()) {
                if (!hasCapability(Capability.NUMERIC_CLASS)) {
                    if (hasCapability(Capability.NOMINAL_CLASS)) {
                        // numeric to nominal, i.e. Discretize
                        Discretize num2nom = new Discretize();
                        String[] options = new String[2];
                        options[0] = "-R";
                        options[1] = Integer.toString(instances.classIndex());
                        num2nom.setOptions(options);
                        filters.add(num2nom);
                    }
                }
            // else all is well
            } else if (instances.attribute(instances.classIndex()).isString()) {
                if (hasCapability(Capability.NOMINAL_CLASS)) {
                    StringToNominal str2nom = new StringToNominal();
                    String[] options = new String[2];
                    options[0] = "-R";
                    options[1] = Integer.toString(instances.classIndex());
                    str2nom.setOptions(options);
                    filters.add(str2nom);
                }
            }
            if (!hasCapability(Capability.MISSING_CLASS_VALUES)) {
                RemoveWithValues missing = new RemoveWithValues();
                String[] options = new String[3];
                options[0] = "-M";
                options[1] = "-C";
                options[2] = Integer.toString(instances.classIndex());
                missing.setOptions(options);
                filters.add(missing);
            }
            if (fclassifier == null) {
                // clusterer, ignore the class attr
                try {
                    // remove firstCompoundID attribute
                    String[] options = new String[2];
                    options[0] = "-R";
                    options[1] = Integer.toString(instances.classIndex());
                    Remove remove = new Remove();
                    remove.setOptions(options);
                    filters.add(remove);
                } catch (Exception x) {
                    throw new AmbitException(x);
                }
            }
        } catch (Exception x) {
            throw new AmbitException(x);
        }
    try {
        // all besides the class (if set!)
        filters.add(new Standardize());
    } catch (Exception x) {
        throw new AmbitException(x);
    }
    // now set the filters
    MultiFilter filter = new MultiFilter();
    filter.setFilters(filters.toArray(new Filter[filters.size()]));
    Instances newInstances = instances;
    if (fclassifier != null)
        fclassifier.setFilter(filter);
    else if (fclusterer != null)
        fclusterer.setFilter(filter);
    else {
        try {
            filter.setInputFormat(instances);
            newInstances = Filter.useFilter(instances, filter);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
    }
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyMMddhhmmss");
    Date timestamp = new Date(System.currentTimeMillis());
    String name = String.format("%s.%s.%s", simpleDateFormat.format(new Date(System.currentTimeMillis())), UUID.randomUUID().toString(), weka.getClass().getName());
    ModelQueryResults m = new ModelQueryResults();
    m.setParameters(parameters);
    m.setId(null);
    m.setContentMediaType(AlgorithmFormat.WEKA.getMediaType());
    m.setName(name);
    m.setAlgorithm(alg_reporter.getURI(algorithm));
    AlgorithmURIReporter r = new AlgorithmURIReporter();
    LiteratureEntry entry = new LiteratureEntry(name, algorithm == null ? weka.getClass().getName() : r.getURI(applicationRootReference.toString(), algorithm));
    LiteratureEntry prediction = new LiteratureEntry(m.getName(), model_reporter.getURI(applicationRootReference.toString(), m));
    prediction.setType(_type.Model);
    Template predictors = null;
    Template dependent = null;
    PredictedVarsTemplate predicted = null;
    if (fclusterer != null) {
        try {
            fclusterer.buildClusterer(newInstances);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
        predicted = new PredictedVarsTemplate(name + "#Predicted");
        Property property = new Property("Cluster", prediction);
        property.setNominal(true);
        predicted.add(property);
        dependent = new Template("Empty");
        predictors = new Template(name + "#Independent");
        for (int i = 0; i < newInstances.numAttributes(); i++) {
            property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
            property.setOrder(i + 1);
            predictors.add(property);
        }
    } else if (fclassifier != null) {
        try {
            System.out.println(fclassifier.getClassifier().getCapabilities());
            fclassifier.getCapabilities().testWithFail(newInstances);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
        try {
            // if (classifier instanceof LinearRegression) //don't do feature selection!
            // classifier.setOptions(new String[] {"-S","1"});
            StringBuilder evaluationString = new StringBuilder();
            EvaluationStats<String> stats = new EvaluationStats<String>(EVType.crossvalidation, null);
            Evaluation eval = new Evaluation(newInstances);
            if (newInstances.numInstances() > 20) {
                eval.crossValidateModel(fclassifier, newInstances, 10, new Random(1));
                evaluationString.append("Crossvalidation 10 folds\n");
            } else {
                eval.crossValidateModel(fclassifier, newInstances, 2, new Random(1));
                evaluationString.append("Crossvalidation 2 folds\n");
            }
            try {
                evaluationString.append(eval.toSummaryString());
                evaluationString.append("\n");
            } catch (Exception x) {
            }
            try {
                evaluationString.append(eval.toClassDetailsString());
                evaluationString.append("\n");
                evaluationString.append(eval.toMatrixString());
                evaluationString.append("\n");
            } catch (Exception x) {
            }
            try {
                evaluationString.append(eval.weightedAreaUnderROC());
            } catch (Exception x) {
            }
            try {
                stats.setMAE(eval.meanAbsoluteError());
            } catch (Exception x) {
            }
            try {
                stats.setRMSE(eval.rootMeanSquaredError());
            } catch (Exception x) {
            }
            try {
                stats.setPctCorrect(eval.pctCorrect());
                stats.setPctInCorrect(eval.pctIncorrect());
            } catch (Exception x) {
            }
            stats.setContent(evaluationString.toString());
            m.addEvaluation(stats);
            stats = new EvaluationStats<String>(EVType.evaluation_training, null);
            evaluationString = new StringBuilder();
            fclassifier.buildClassifier(newInstances);
            eval = new Evaluation(newInstances);
            eval.evaluateModel(fclassifier, newInstances);
            try {
                evaluationString.append("\nTraining dataset statistics\n");
                evaluationString.append(eval.toSummaryString());
                evaluationString.append("\n");
            } catch (Exception x) {
            }
            try {
                evaluationString.append(eval.toMatrixString());
                evaluationString.append("\n");
            } catch (Exception x) {
            }
            try {
                stats.setMAE(eval.meanAbsoluteError());
            } catch (Exception x) {
            }
            try {
                stats.setRMSE(eval.rootMeanSquaredError());
            } catch (Exception x) {
            }
            try {
                stats.setPctCorrect(eval.pctCorrect());
                stats.setPctInCorrect(eval.pctIncorrect());
            } catch (Exception x) {
            }
            stats.setContent(evaluationString.toString());
            m.addEvaluation(stats);
        } catch (WekaException x) {
            throw new AmbitException(x);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
        ;
        dependent = new Template(name + "#Dependent");
        Property property = createPropertyFromReference(new Reference(newInstances.attribute(newInstances.classIndex()).name()), entry, referer);
        dependent.add(property);
        predicted = new PredictedVarsTemplate(name + "#Predicted");
        Property predictedProperty = new Property(property.getName(), prediction);
        predictedProperty.setLabel(property.getLabel());
        predictedProperty.setUnits(property.getUnits());
        predictedProperty.setClazz(property.getClazz());
        predictedProperty.setNominal(property.isNominal());
        predicted.add(predictedProperty);
        predictedProperty.setEnabled(true);
        if (supportsDistribution(fclassifier)) {
            Property confidenceProperty = new Property(String.format("%s Confidence", property.getName()), prediction);
            confidenceProperty.setLabel(Property.opentox_ConfidenceFeature);
            confidenceProperty.setUnits("");
            confidenceProperty.setClazz(Number.class);
            confidenceProperty.setEnabled(true);
            PropertyAnnotation<Property> a = new PropertyAnnotation<Property>();
            a.setType(OT.OTClass.ModelConfidenceFeature.name());
            a.setPredicate(OT.OTProperty.confidenceOf.name());
            a.setObject(predictedProperty);
            PropertyAnnotations aa = new PropertyAnnotations();
            aa.add(a);
            confidenceProperty.setAnnotations(aa);
            predicted.add(confidenceProperty);
        }
        predictors = new Template(name + "#Independent");
        for (int i = 0; i < newInstances.numAttributes(); i++) {
            if ("CompoundURI".equals(newInstances.attribute(i).name()))
                continue;
            if (newInstances.classIndex() == i)
                continue;
            property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
            property.setOrder(i + 1);
            predictors.add(property);
        }
    } else if (pca != null) {
        try {
            pca.setVarianceCovered(1.0);
            pca.buildEvaluator(newInstances);
        } catch (Exception x) {
            throw new AmbitException(x);
        }
        Property property;
        dependent = new Template("Empty");
        predictors = new Template(name + "#Independent");
        for (int i = 0; i < newInstances.numAttributes(); i++) {
            if ("CompoundURI".equals(newInstances.attribute(i).name()))
                continue;
            if (newInstances.classIndex() == i)
                continue;
            property = createPropertyFromReference(new Reference(newInstances.attribute(i).name()), entry, referer);
            property.setOrder(i + 1);
            predictors.add(property);
        }
        predicted = new PredictedVarsTemplate(name + "#Predicted");
        for (int i = 0; i < newInstances.numAttributes(); i++) {
            if (newInstances.classIndex() == i)
                continue;
            property = createPropertyFromReference(new Reference(String.format("PCA_%d", i + 1)), entry, referer);
            property.setClazz(Number.class);
            property.setOrder(i + 1);
            predicted.add(property);
        }
    }
    m.setPredictors(predictors);
    m.setDependent(dependent);
    m.setPredicted(predicted);
    try {
        serializeModel(fclusterer == null ? fclassifier == null ? pca : fclassifier : fclusterer, newInstances, m);
    } catch (Exception x) {
        throw new AmbitException(x);
    }
    return m;
}
Also used : PropertyAnnotations(ambit2.base.data.PropertyAnnotations) LiteratureEntry(ambit2.base.data.LiteratureEntry) ArrayList(java.util.ArrayList) Classifier(weka.classifiers.Classifier) FilteredClassifier(weka.classifiers.meta.FilteredClassifier) Remove(weka.filters.unsupervised.attribute.Remove) RemoveWithValues(weka.filters.unsupervised.instance.RemoveWithValues) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) FilteredClusterer(weka.clusterers.FilteredClusterer) PredictedVarsTemplate(ambit2.base.data.PredictedVarsTemplate) Template(ambit2.base.data.Template) PropertyAnnotation(ambit2.base.data.PropertyAnnotation) Standardize(weka.filters.unsupervised.attribute.Standardize) Random(java.util.Random) Discretize(weka.filters.unsupervised.attribute.Discretize) ResourceException(org.restlet.resource.ResourceException) AlgorithmURIReporter(ambit2.rest.algorithm.AlgorithmURIReporter) Property(ambit2.base.data.Property) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues) EvaluationStats(ambit2.model.evaluation.EvaluationStats) PrincipalComponents(weka.attributeSelection.PrincipalComponents) PredictedVarsTemplate(ambit2.base.data.PredictedVarsTemplate) Evaluation(weka.classifiers.Evaluation) IEvaluation(ambit2.core.data.model.IEvaluation) WekaException(weka.core.WekaException) ModelQueryResults(ambit2.core.data.model.ModelQueryResults) Reference(org.restlet.data.Reference) MultiFilter(weka.filters.MultiFilter) AmbitException(net.idea.modbcum.i.exceptions.AmbitException) WekaException(weka.core.WekaException) ResourceException(org.restlet.resource.ResourceException) IOException(java.io.IOException) FilteredClassifier(weka.classifiers.meta.FilteredClassifier) Date(java.util.Date) Instances(weka.core.Instances) StringToNominal(weka.filters.unsupervised.attribute.StringToNominal) IBk(weka.classifiers.lazy.IBk) MultiFilter(weka.filters.MultiFilter) Filter(weka.filters.Filter) FilteredClusterer(weka.clusterers.FilteredClusterer) Clusterer(weka.clusterers.Clusterer) SimpleDateFormat(java.text.SimpleDateFormat) AmbitException(net.idea.modbcum.i.exceptions.AmbitException)

Example 4 with Standardize

use of weka.filters.unsupervised.attribute.Standardize in project umple by umple.

the class GaussianProcesses method buildClassifier.

/**
 * Method for building the classifier.
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
@Override
public void buildClassifier(Instances insts) throws Exception {
    /* check the set of training instances */
    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);
        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }
    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }
        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }
    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        ((Standardize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        ((Normalize) m_Filter).setIgnoreClass(true);
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }
    m_NumTrain = insts.numInstances();
    // applied to the class by the filter
    if (m_Filter != null) {
        Instance witness = (Instance) insts.instance(0).copy();
        witness.setValue(insts.classIndex(), 0);
        m_Filter.input(witness);
        m_Filter.batchFinished();
        Instance res = m_Filter.output();
        m_Blin = res.value(insts.classIndex());
        witness.setValue(insts.classIndex(), 1);
        m_Filter.input(witness);
        m_Filter.batchFinished();
        res = m_Filter.output();
        m_Alin = res.value(insts.classIndex()) - m_Blin;
    } else {
        m_Alin = 1.0;
        m_Blin = 0.0;
    }
    // Initialize kernel
    try {
        CachedKernel cachedKernel = (CachedKernel) m_kernel;
        cachedKernel.setCacheSize(0);
    } catch (Exception e) {
    // ignore
    }
    m_kernel.buildKernel(insts);
    // Compute average target value
    double sum = 0.0;
    for (int i = 0; i < insts.numInstances(); i++) {
        sum += insts.instance(i).classValue();
    }
    m_avg_target = sum / insts.numInstances();
    // Store squared noise level
    m_deltaSquared = m_delta * m_delta;
    // initialize kernel matrix/covariance matrix
    int n = insts.numInstances();
    m_L = new double[n][];
    double kv = 0;
    for (int i = 0; i < n; i++) {
        m_L[i] = new double[i + 1];
        for (int j = 0; j < i; j++) {
            kv = m_kernel.eval(i, j, insts.instance(i));
            m_L[i][j] = kv;
        }
        kv = m_kernel.eval(i, i, insts.instance(i));
        m_L[i][i] = kv + m_deltaSquared;
    }
    // exponent 1)
    if (m_kernel instanceof CachedKernel) {
        m_kernel = Kernel.makeCopy(m_kernel);
        ((CachedKernel) m_kernel).setCacheSize(-1);
        m_kernel.buildKernel(insts);
    }
    // Calculate inverse matrix exploiting symmetry of covariance matrix
    // NB this replaces the kernel matrix with (the negative of) its inverse and
    // does
    // not require any extra memory for a solution matrix
    double[] tmprow = new double[n];
    double tmp2 = 0, tmp = 0;
    for (int i = 0; i < n; i++) {
        tmp = -m_L[i][i];
        m_L[i][i] = 1.0 / tmp;
        for (int j = 0; j < n; j++) {
            if (j != i) {
                if (j < i) {
                    tmprow[j] = m_L[i][j];
                    m_L[i][j] /= tmp;
                    tmp2 = m_L[i][j];
                    m_L[j][j] += tmp2 * tmp2 * tmp;
                } else if (j > i) {
                    tmprow[j] = m_L[j][i];
                    m_L[j][i] /= tmp;
                    tmp2 = m_L[j][i];
                    m_L[j][j] += tmp2 * tmp2 * tmp;
                }
            }
        }
        for (int j = 0; j < n; j++) {
            if (j != i) {
                if (i < j) {
                    for (int k = 0; k < i; k++) {
                        m_L[j][k] += tmprow[j] * m_L[i][k];
                    }
                } else {
                    for (int k = 0; k < j; k++) {
                        m_L[j][k] += tmprow[j] * m_L[i][k];
                    }
                }
                for (int k = i + 1; k < j; k++) {
                    m_L[j][k] += tmprow[j] * m_L[k][i];
                }
            }
        }
    }
    m_t = new Matrix(insts.numInstances(), 1);
    double[] tt = new double[n];
    for (int i = 0; i < n; i++) {
        tt[i] = insts.instance(i).classValue() - m_avg_target;
    }
    // calculate m_t = tt . m_L
    for (int i = 0; i < n; i++) {
        double s = 0;
        for (int k = 0; k < i; k++) {
            s -= m_L[i][k] * tt[k];
        }
        for (int k = i; k < n; k++) {
            s -= m_L[k][i] * tt[k];
        }
        m_t.set(i, 0, s);
    }
}
Also used : CachedKernel(weka.classifiers.functions.supportVector.CachedKernel) Normalize(weka.filters.unsupervised.attribute.Normalize) Instance(weka.core.Instance) NominalToBinary(weka.filters.unsupervised.attribute.NominalToBinary) Instances(weka.core.Instances) Standardize(weka.filters.unsupervised.attribute.Standardize) Matrix(weka.core.matrix.Matrix) ReplaceMissingValues(weka.filters.unsupervised.attribute.ReplaceMissingValues)

Example 5 with Standardize

use of weka.filters.unsupervised.attribute.Standardize in project umple by umple.

the class PrincipalComponents method fillCorrelation.

/**
 * Fill the correlation matrix
 */
private void fillCorrelation() throws Exception {
    m_correlation = new double[m_numAttribs][m_numAttribs];
    double[] att1 = new double[m_numInstances];
    double[] att2 = new double[m_numInstances];
    double corr;
    for (int i = 0; i < m_numAttribs; i++) {
        for (int j = 0; j < m_numAttribs; j++) {
            for (int k = 0; k < m_numInstances; k++) {
                att1[k] = m_trainInstances.instance(k).value(i);
                att2[k] = m_trainInstances.instance(k).value(j);
            }
            if (i == j) {
                m_correlation[i][j] = 1.0;
                // store the standard deviation
                m_stdDevs[i] = Math.sqrt(Utils.variance(att1));
            } else {
                corr = Utils.correlation(att1, att2, m_numInstances);
                m_correlation[i][j] = corr;
                m_correlation[j][i] = corr;
            }
        }
    }
    // now standardize the input data
    m_standardizeFilter = new Standardize();
    m_standardizeFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter);
}
Also used : Standardize(weka.filters.unsupervised.attribute.Standardize)

Aggregations

Standardize (weka.filters.unsupervised.attribute.Standardize)5 Instances (weka.core.Instances)4 NominalToBinary (weka.filters.unsupervised.attribute.NominalToBinary)4 ReplaceMissingValues (weka.filters.unsupervised.attribute.ReplaceMissingValues)4 Normalize (weka.filters.unsupervised.attribute.Normalize)3 Random (java.util.Random)2 Instance (weka.core.Instance)2 LiteratureEntry (ambit2.base.data.LiteratureEntry)1 PredictedVarsTemplate (ambit2.base.data.PredictedVarsTemplate)1 Property (ambit2.base.data.Property)1 PropertyAnnotation (ambit2.base.data.PropertyAnnotation)1 PropertyAnnotations (ambit2.base.data.PropertyAnnotations)1 Template (ambit2.base.data.Template)1 IEvaluation (ambit2.core.data.model.IEvaluation)1 ModelQueryResults (ambit2.core.data.model.ModelQueryResults)1 EvaluationStats (ambit2.model.evaluation.EvaluationStats)1 AlgorithmURIReporter (ambit2.rest.algorithm.AlgorithmURIReporter)1 IOException (java.io.IOException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 ArrayList (java.util.ArrayList)1