Search in sources :

Example 1 with DenseInstance

use of weka.core.DenseInstance in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFile.

/**
 * Converts a feature store to a list of instances. Single-label case.
 *
 * @param outputFile
 *            the output file
 * @param instanceList
 *            the instance list
 * @param useDenseInstances
 *            use dense instances
 * @param isRegressionExperiment
 *            is regression
 * @param useWeights
 *            uses weight
 * @throws Exception
 *             in case of error
 */
public static void instanceListToArffFile(File outputFile, List<Instance> instanceList, boolean useDenseInstances, boolean isRegressionExperiment, boolean useWeights) throws Exception {
    List<String> outcomeList = new ArrayList<>();
    for (Instance i : instanceList) {
        outcomeList.add(i.getOutcome());
    }
    // check for error conditions
    if (outcomeList.isEmpty()) {
        throw new IllegalArgumentException("List of instance outcomes is empty.");
    }
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instanceList);
    // Make sure "outcome" is not the name of an attribute
    Attribute outcomeAttribute = createOutcomeAttribute(outcomeList, isRegressionExperiment);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    Instances wekaInstances = new Instances(RELATION_NAME, attributeStore.getAttributes(), instanceList.size());
    wekaInstances.setClass(outcomeAttribute);
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instanceList.size(); i++) {
        Instance instance = instanceList.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        String outcome = outcomeList.get(i);
        if (isRegressionExperiment) {
            wekaInstance.setClassValue(Double.parseDouble(outcome));
        } else {
            wekaInstance.setClassValue(outcome);
        }
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 2 with DenseInstance

use of weka.core.DenseInstance in project dkpro-tc by dkpro.

the class ReplaceMissingValuesWithZeroFilter method convertInstance.

/**
 * Convert a single instance over. The converted instance is added to the end of the output
 * queue.
 *
 * @param instance
 *            the instance to convert
 */
private void convertInstance(Instance instance) {
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        double[] vals = new double[instance.numValues()];
        int[] indices = new int[instance.numValues()];
        int num = 0;
        for (int j = 0; j < instance.numValues(); j++) {
            if (instance.isMissingSparse(j) && (getInputFormat().classIndex() != instance.index(j)) && (instance.attributeSparse(j).isNominal() || instance.attributeSparse(j).isNumeric())) {
            } else {
                vals[num] = instance.valueSparse(j);
                indices[num] = instance.index(j);
                num++;
            }
        }
        if (num == instance.numValues()) {
            inst = new SparseInstance(instance.weight(), vals, indices, instance.numAttributes());
        } else {
            double[] tempVals = new double[num];
            int[] tempInd = new int[num];
            System.arraycopy(vals, 0, tempVals, 0, num);
            System.arraycopy(indices, 0, tempInd, 0, num);
            inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
        }
    } else {
        double[] vals = new double[getInputFormat().numAttributes()];
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (instance.isMissing(j) && (getInputFormat().classIndex() != j) && (getInputFormat().attribute(j).isNominal() || getInputFormat().attribute(j).isNumeric())) {
                vals[j] = 0.0d;
            } else {
                vals[j] = instance.value(j);
            }
        }
        inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(instance.dataset());
    push(inst);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance)

Example 3 with DenseInstance

use of weka.core.DenseInstance in project TrakEM2 by trakem2.

the class LineageClassifier method classify.

public static final boolean classify(final double[] vector) throws Exception {
    // Obtain or generate a Thread-local instance
    Operator op;
    synchronized (table) {
        // avoid clashes within weka
        final Thread t = Thread.currentThread();
        op = table.get(t);
        if (null == op) {
            op = new Operator();
            table.put(t, op);
        }
    }
    // Future weka versions will use new DenseInstance(1, vector) instead
    final Instance ins = new DenseInstance(1, vector);
    ins.setDataset(op.data);
    // Was trained to return true or false, represented in weka as 0 or 1
    return 1 == ((int) Math.round(op.c.classifyInstance(ins)));
}
Also used : DenseInstance(weka.core.DenseInstance) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance)

Example 4 with DenseInstance

use of weka.core.DenseInstance in project cia by Hack23.

the class WordCounterImpl method calculateWordCount.

@Override
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {
    final String html = documentContentData.getContent();
    final Attribute input = new Attribute(HTML, (ArrayList<String>) null);
    final ArrayList<Attribute> inputVec = new ArrayList<>();
    inputVec.add(input);
    final Instances htmlInst = new Instances(HTML, inputVec, 1);
    htmlInst.add(new DenseInstance(1));
    htmlInst.instance(0).setValue(0, html);
    final StopwordsHandler stopwordsHandler = new StopwordsHandler() {

        @Override
        public boolean isStopword(final String word) {
            return word.length() < 5;
        }
    };
    final NGramTokenizer tokenizer = new NGramTokenizer();
    tokenizer.setNGramMinSize(1);
    tokenizer.setNGramMaxSize(1);
    tokenizer.setDelimiters(TOKEN_DELIMITERS);
    final StringToWordVector filter = new StringToWordVector();
    filter.setTokenizer(tokenizer);
    filter.setStopwordsHandler(stopwordsHandler);
    filter.setLowerCaseTokens(true);
    filter.setOutputWordCounts(true);
    filter.setWordsToKeep(maxResult);
    final Map<String, Integer> result = new HashMap<>();
    try {
        filter.setInputFormat(htmlInst);
        final Instances dataFiltered = Filter.useFilter(htmlInst, filter);
        final Instance last = dataFiltered.lastInstance();
        final int numAttributes = last.numAttributes();
        for (int i = 0; i < numAttributes; i++) {
            result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i)));
        }
    } catch (final Exception e) {
        LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e);
    }
    return result;
}
Also used : DenseInstance(weka.core.DenseInstance) Attribute(weka.core.Attribute) HashMap(java.util.HashMap) Instance(weka.core.Instance) DenseInstance(weka.core.DenseInstance) ArrayList(java.util.ArrayList) StopwordsHandler(weka.core.stopwords.StopwordsHandler) NGramTokenizer(weka.core.tokenizers.NGramTokenizer) Instances(weka.core.Instances) StringToWordVector(weka.filters.unsupervised.attribute.StringToWordVector)

Example 5 with DenseInstance

use of weka.core.DenseInstance in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFileMultiLabel.

/**
 * Converts a feature store to a list of instances. Multi-label case.
 *
 * @param outputFile
 *            the output file
 * @param instances
 *            the instances to convert
 * @param useDenseInstances
 *            dense features
 * @param useWeights
 *            use weights
 * @throws Exception
 *             in case of errors
 */
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
    List<String> outcomes = new ArrayList<>();
    for (Instance i : instances) {
        outcomes.add(i.getOutcome());
    }
    List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    wekaInstances.setClassIndex(outcomeAttributes.size());
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instances.size(); i++) {
        Instance instance = instances.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        // set class label values
        List<String> instanceOutcome = instance.getOutcomes();
        for (Attribute label : outcomeAttributes) {
            String labelname = label.name();
            featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
        }
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Aggregations

DenseInstance (weka.core.DenseInstance)10 Instances (weka.core.Instances)7 Attribute (weka.core.Attribute)5 SparseInstance (weka.core.SparseInstance)5 ArrayList (java.util.ArrayList)4 Instance (org.dkpro.tc.api.features.Instance)4 Instance (weka.core.Instance)4 IOException (java.io.IOException)3 MultiLabelInstances (mulan.data.MultiLabelInstances)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)2 ArffSaver (weka.core.converters.ArffSaver)2 Font (java.awt.Font)1 GridBagConstraints (java.awt.GridBagConstraints)1 GridBagLayout (java.awt.GridBagLayout)1 Insets (java.awt.Insets)1 ActionEvent (java.awt.event.ActionEvent)1 ActionListener (java.awt.event.ActionListener)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1