use of weka.core.converters.ArffSaver in project 490 by pauleibye.
the class MainFrame method createCSVandArff.
// convert our weird arraylist to a sick ARFF file and save it
public void createCSVandArff(FeatureExtractor FE) {
try {
File file = new File("csv.txt");
if (!file.exists()) {
file.createNewFile();
PrintWriter writer = new PrintWriter(file);
for (String s : FE.getFileFeatures()) {
writer.println(s);
}
writer.close();
}
File arff = new File("data.arff");
if (!arff.exists()) {
CSVLoader loader = new CSVLoader();
loader.setSource(file);
Instances data = loader.getDataSet();
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(arff);
saver.writeBatch();
}
} catch (IOException e) {
System.out.println("Exception Occurred:");
e.printStackTrace();
}
}
use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.
the class WekaUtils method instanceListToArffFile.
/**
* Converts a feature store to a list of instances. Single-label case.
*
* @param outputFile
* the output file
* @param instanceList
* the instance list
* @param useDenseInstances
* use dense instances
* @param isRegressionExperiment
* is regression
* @param useWeights
* uses weight
* @throws Exception
* in case of error
*/
public static void instanceListToArffFile(File outputFile, List<Instance> instanceList, boolean useDenseInstances, boolean isRegressionExperiment, boolean useWeights) throws Exception {
List<String> outcomeList = new ArrayList<>();
for (Instance i : instanceList) {
outcomeList.add(i.getOutcome());
}
// check for error conditions
if (outcomeList.isEmpty()) {
throw new IllegalArgumentException("List of instance outcomes is empty.");
}
// Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instanceList);
// Make sure "outcome" is not the name of an attribute
Attribute outcomeAttribute = createOutcomeAttribute(outcomeList, isRegressionExperiment);
if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
}
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
Instances wekaInstances = new Instances(RELATION_NAME, attributeStore.getAttributes(), instanceList.size());
wekaInstances.setClass(outcomeAttribute);
if (!outputFile.exists()) {
outputFile.mkdirs();
outputFile.createNewFile();
}
ArffSaver saver = new ArffSaver();
// preprocessingFilter.setInputFormat(wekaInstances);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(outputFile);
saver.setCompressOutput(true);
saver.setInstances(wekaInstances);
for (int i = 0; i < instanceList.size(); i++) {
Instance instance = instanceList.get(i);
double[] featureValues = getFeatureValues(attributeStore, instance);
weka.core.Instance wekaInstance;
if (useDenseInstances) {
wekaInstance = new DenseInstance(1.0, featureValues);
} else {
wekaInstance = new SparseInstance(1.0, featureValues);
}
wekaInstance.setDataset(wekaInstances);
String outcome = outcomeList.get(i);
if (isRegressionExperiment) {
wekaInstance.setClassValue(Double.parseDouble(outcome));
} else {
wekaInstance.setClassValue(outcome);
}
Double instanceWeight = instance.getWeight();
if (useWeights) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
// finishes the incremental saving process
saver.writeIncremental(null);
}
use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.
the class WekaUtils method instanceListToArffFileMultiLabel.
/**
* Converts a feature store to a list of instances. Multi-label case.
*
* @param outputFile
* the output file
* @param instances
* the instances to convert
* @param useDenseInstances
* dense features
* @param useWeights
* use weights
* @throws Exception
* in case of errors
*/
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
// Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
List<String> outcomes = new ArrayList<>();
for (Instance i : instances) {
outcomes.add(i.getOutcome());
}
List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
wekaInstances.setClassIndex(outcomeAttributes.size());
if (!outputFile.exists()) {
outputFile.mkdirs();
outputFile.createNewFile();
}
ArffSaver saver = new ArffSaver();
// preprocessingFilter.setInputFormat(wekaInstances);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(outputFile);
saver.setCompressOutput(true);
saver.setInstances(wekaInstances);
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useDenseInstances) {
wekaInstance = new DenseInstance(1.0, featureValues);
} else {
wekaInstance = new SparseInstance(1.0, featureValues);
}
wekaInstance.setDataset(wekaInstances);
Double instanceWeight = instance.getWeight();
if (useWeights) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
// finishes the incremental saving process
saver.writeIncremental(null);
}
use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.
the class MekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttributes = createOutcomeAttributes(outcomeList);
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
masterInstance = new Instances(WekaUtils.RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
masterInstance.setClassIndex(outcomeAttributes.size());
saver.setInstances(masterInstance);
return masterInstance;
}
use of weka.core.converters.ArffSaver in project dkpro-tc by dkpro.
the class WekaDataWriter method initalConfiguration.
private Instances initalConfiguration(Collection<Instance> instances) throws TextClassificationException, IOException {
if (saver != null) {
return masterInstance;
}
saver = new ArffSaver();
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(arffTarget);
saver.setCompressOutput(false);
attributeStore = new AttributeStore();
List<String> lines = FileUtils.readLines(new File(outputFolder, Constants.FILENAME_FEATURES_DESCRIPTION), "utf-8");
for (String l : lines) {
String[] split = l.split("\t");
String featureName = split[0];
if (!attributeStore.containsAttributeName(featureName)) {
FeatureType type = FeatureType.valueOf(split[1]);
String enumType = null;
if (type == FeatureType.NOMINAL) {
enumType = split[2];
}
Attribute attribute = WekaFeatureEncoder.featureToAttributeUsingFeatureDescription(featureName, type, enumType);
attributeStore.addAttribute(featureName, attribute);
}
}
// Make sure "outcome" is not the name of an attribute
List<String> outcomeList = Arrays.asList(outcomes);
outcomeAttribute = createOutcomeAttribute(outcomeList, isRegression);
if (attributeStore.containsAttributeName(CLASS_ATTRIBUTE_NAME)) {
System.err.println("A feature with name \"outcome\" was found. Renaming outcome attribute");
outcomeAttribute = outcomeAttribute.copy(CLASS_ATTRIBUTE_PREFIX + CLASS_ATTRIBUTE_NAME);
}
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
masterInstance = new Instances(WekaUtils.RELATION_NAME, attributeStore.getAttributes(), instances.size());
masterInstance.setClass(outcomeAttribute);
saver.setInstances(masterInstance);
return masterInstance;
}
Aggregations