use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtils method instanceListToArffFileMultiLabel.
/**
* Converts a feature store to a list of instances. Multi-label case.
*
* @param outputFile
* the output file
* @param instances
* the instances to convert
* @param useDenseInstances
* dense features
* @param useWeights
* use weights
* @throws Exception
* in case of errors
*/
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
// Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
List<String> outcomes = new ArrayList<>();
for (Instance i : instances) {
outcomes.add(i.getOutcome());
}
List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
wekaInstances.setClassIndex(outcomeAttributes.size());
if (!outputFile.exists()) {
outputFile.mkdirs();
outputFile.createNewFile();
}
ArffSaver saver = new ArffSaver();
// preprocessingFilter.setInputFormat(wekaInstances);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(outputFile);
saver.setCompressOutput(true);
saver.setInstances(wekaInstances);
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useDenseInstances) {
wekaInstance = new DenseInstance(1.0, featureValues);
} else {
wekaInstance = new SparseInstance(1.0, featureValues);
}
wekaInstance.setDataset(wekaInstances);
Double instanceWeight = instance.getWeight();
if (useWeights) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
// finishes the incremental saving process
saver.writeIncremental(null);
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class MekaDataWriter method transformFromGeneric.
@Override
public void transformFromGeneric() throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(outputFolder, GENERIC_FEATURE_FILE)), "utf-8"));
String line = null;
while ((line = reader.readLine()) != null) {
Instance[] restoredInstance = gson.fromJson(line, Instance[].class);
writeClassifierFormat(Arrays.asList(restoredInstance));
}
reader.close();
FileUtils.deleteQuietly(new File(outputFolder, GENERIC_FEATURE_FILE));
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class MekaDataWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
Instances masterInstance = initalConfiguration(instances);
for (Instance instance : instances) {
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useSparse) {
wekaInstance = new SparseInstance(1.0, featureValues);
} else {
wekaInstance = new DenseInstance(1.0, featureValues);
}
wekaInstance.setDataset(masterInstance);
Double instanceWeight = instance.getWeight();
if (applyWeighting) {
wekaInstance.setWeight(instanceWeight);
}
saver.writeIncremental(wekaInstance);
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaDataWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
Instances masterInstance = initalConfiguration(instances);
for (Instance inst : instances) {
double[] featureValues = getFeatureValues(attributeStore, inst);
weka.core.Instance wekaInstance;
if (useSparse) {
wekaInstance = new SparseInstance(1.0, featureValues);
} else {
wekaInstance = new DenseInstance(1.0, featureValues);
}
wekaInstance.setDataset(masterInstance);
String outcome = inst.getOutcome();
if (isRegression) {
wekaInstance.setClassValue(Double.parseDouble(outcome));
} else {
wekaInstance.setClassValue(outcome);
}
Double instanceWeight = inst.getWeight();
if (applyWeighting) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaDataWriter method transformFromGeneric.
@Override
public void transformFromGeneric() throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(outputFolder, GENERIC_FEATURE_FILE)), "utf-8"));
String line = null;
while ((line = reader.readLine()) != null) {
Instance[] restoredInstances = gson.fromJson(line, Instance[].class);
writeClassifierFormat(Arrays.asList(restoredInstances));
}
reader.close();
FileUtils.deleteQuietly(new File(outputFolder, GENERIC_FEATURE_FILE));
}
Aggregations