Search in sources :

Example 1 with ArffSaver

use of weka.core.converters.ArffSaver in project 490 by pauleibye.

the class MainFrame method createCSVandArff.

// convert our weird arraylist to a sick ARFF file and save it
public void createCSVandArff(FeatureExtractor FE) {
    try {
        File file = new File("csv.txt");
        if (!file.exists()) {
            file.createNewFile();
            PrintWriter writer = new PrintWriter(file);
            for (String s : FE.getFileFeatures()) {
                writer.println(s);
            }
            writer.close();
        }
        File arff = new File("data.arff");
        if (!arff.exists()) {
            CSVLoader loader = new CSVLoader();
            loader.setSource(file);
            Instances data = loader.getDataSet();
            ArffSaver saver = new ArffSaver();
            saver.setInstances(data);
            saver.setFile(arff);
            saver.writeBatch();
        }
    } catch (IOException e) {
        System.out.println("Exception Occurred:");
        e.printStackTrace();
    }
}
Also used : Instances(weka.core.Instances) CSVLoader(weka.core.converters.CSVLoader) ArffSaver(weka.core.converters.ArffSaver) IOException(java.io.IOException) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 2 with ArffSaver

use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFile.

/**
 * Converts a feature store to a list of instances. Single-label case.
 *
 * @param outputFile
 *            the output file
 * @param instanceList
 *            the instance list
 * @param useDenseInstances
 *            use dense instances
 * @param isRegressionExperiment
 *            is regression
 * @param useWeights
 *            uses weight
 * @throws Exception
 *             in case of error
 */
public static void instanceListToArffFile(File outputFile, List<Instance> instanceList, boolean useDenseInstances, boolean isRegressionExperiment, boolean useWeights) throws Exception {
    List<String> outcomeList = new ArrayList<>();
    for (Instance i : instanceList) {
        outcomeList.add(i.getOutcome());
    }
    // check for error conditions
    if (outcomeList.isEmpty()) {
        throw new IllegalArgumentException("List of instance outcomes is empty.");
    }
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instanceList);
    // Make sure "outcome" is not the name of an attribute
    Attribute outcomeAttribute = createOutcomeAttribute(outcomeList, isRegressionExperiment);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    Instances wekaInstances = new Instances(RELATION_NAME, attributeStore.getAttributes(), instanceList.size());
    wekaInstances.setClass(outcomeAttribute);
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instanceList.size(); i++) {
        Instance instance = instanceList.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        String outcome = outcomeList.get(i);
        if (isRegressionExperiment) {
            wekaInstance.setClassValue(Double.parseDouble(outcome));
        } else {
            wekaInstance.setClassValue(outcome);
        }
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 3 with ArffSaver

use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFileMultiLabel.

/**
 * Converts a feature store to a list of instances. Multi-label case.
 *
 * @param outputFile
 *            the output file
 * @param instances
 *            the instances to convert
 * @param useDenseInstances
 *            dense features
 * @param useWeights
 *            use weights
 * @throws Exception
 *             in case of errors
 */
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
    List<String> outcomes = new ArrayList<>();
    for (Instance i : instances) {
        outcomes.add(i.getOutcome());
    }
    List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    wekaInstances.setClassIndex(outcomeAttributes.size());
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instances.size(); i++) {
        Instance instance = instances.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        // set class label values
        List<String> instanceOutcome = instance.getOutcomes();
        for (Attribute label : outcomeAttributes) {
            String labelname = label.name();
            featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
        }
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 4 with ArffSaver

use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.

the class MekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttributes = createOutcomeAttributes(outcomeList);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    masterInstance.setClassIndex(outcomeAttributes.size());
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Example 5 with ArffSaver

use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.

the class WekaDataWriter method initalConfiguration.

private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
    if (saver != null) {
        return masterInstance;
    }
    saver = new ArffSaver();
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(arffTarget);
    saver.setCompressOutput(false);
    attributeStore = new AttributeStore();
    List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
    for (String l : lines) {
        String[] split = l.split("\t");
        String featureName = split[0];
        if (!attributeStore.containsAttributeName(featureName)) {
            FeatureType type = FeatureType.valueOf(split[1]);
            String enumType = null;
            if (type == FeatureType.NOMINAL) {
                enumType = split[2];
            }
            Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
            attributeStore.addAttribute(featureName, attribute);
        }
    }
    // Make sure "outcome" is not the name of an attribute
    List<String> outcomeList = Arrays.asList(outcomes);
    outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
    if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
        System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
        outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
    }
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
    masterInstance.setClass(outcomeAttribute);
    saver.setInstances(masterInstance);
    return masterInstance;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instances(weka.core.Instances) FeatureType(org.dkpro.tc.api.features.FeatureType) Attribute(weka.core.Attribute) ArffSaver(weka.core.converters.ArffSaver) File(java.io.File)

Aggregations

ArffSaver (weka.core.converters.ArffSaver)6 Instances (weka.core.Instances)5 File (java.io.File)4 Attribute (weka.core.Attribute)4 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 MultiLabelInstances (mulan.data.MultiLabelInstances)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2 Instance (org.dkpro.tc.api.features.Instance)2 AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)2 DenseInstance (weka.core.DenseInstance)2 SparseInstance (weka.core.SparseInstance)2 PrintWriter (java.io.PrintWriter)1 CSVLoader (weka.core.converters.CSVLoader)1