use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceTest.
@Test
public void tcInstanceToWekaInstanceTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, outcomeValues, false);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(5, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceFailTest.
@Test(expected = IllegalArgumentException.class)
public void tcInstanceToWekaInstanceFailTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_4", "val_2" })));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
@SuppressWarnings("unused") weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaOutcomeIDReport method execute.
@Override
public void execute() throws Exception {
init();
File arff = WekaUtils.getFile(getContext(), "", FILENAME_PREDICTIONS, AccessMode.READONLY);
mlResults = WekaUtils.getFile(getContext(), "", WekaTestTask.evaluationBin, AccessMode.READONLY);
Instances predictions = WekaUtils.getInstances(arff, isMultiLabel);
List<String> labels = getLabels(isMultiLabel, isRegression);
Properties props;
if (isMultiLabel) {
MultilabelResult r = WekaUtils.readMlResultFromFile(mlResults);
props = generateMlProperties(predictions, labels, r);
} else {
Map<Integer, String> documentIdMap = loadDocumentMap();
props = generateSlProperties(predictions, isRegression, isUnit, documentIdMap, labels);
}
FileWriterWithEncoding fw = null;
try {
fw = new FileWriterWithEncoding(getTargetOutputFile(), "utf-8");
props.store(fw, generateHeader(labels));
} finally {
IOUtils.closeQuietly(fw);
}
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaSerliazeModelConnector method writeWekaSpecificInformation.
private void writeWekaSpecificInformation(TaskContext aContext) throws Exception {
boolean isMultiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
boolean isRegression = learningMode.equals(Constants.LM_REGRESSION);
File arffFileTrain = new File(aContext.getFolder(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/" + Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT);
Instances trainData = WekaUtils.getInstances(arffFileTrain, isMultiLabel);
trainData = WekaUtils.removeInstanceId(trainData, isMultiLabel);
// FEATURE SELECTION
if (!isMultiLabel) {
if (featureSearcher != null && attributeEvaluator != null) {
// Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
throw new Exception("Feature Selection is currently not supported in Save Model mode.");
}
} else {
if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
// Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
throw new Exception("Feature Selection is currently not supported in Save Model mode.");
}
}
// write training data header
ObjectOutputStream outT = new ObjectOutputStream(new FileOutputStream(new File(outputFolder, "training_data")));
Instances emptyTrainCopy = new Instances(trainData);
emptyTrainCopy.delete();
outT.writeObject(emptyTrainCopy);
outT.close();
// write model file
Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
cl.buildClassifier(trainData);
File model = new File(outputFolder, MODEL_CLASSIFIER);
mkdir(model.getParentFile());
weka.core.SerializationHelper.write(model.getAbsolutePath(), cl);
// write class labels file
List<String> classLabels;
if (!isRegression) {
classLabels = WekaUtils.getClassLabels(trainData, isMultiLabel);
String classLabelsString = StringUtils.join(classLabels, "\n");
FileUtils.writeStringToFile(new File(outputFolder, MODEL_CLASS_LABELS), classLabelsString, "utf-8");
}
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaUtils method instanceListToArffFileMultiLabel.
/**
* Converts a feature store to a list of instances. Multi-label case.
*
* @param outputFile
* the output file
* @param instances
* the instances to convert
* @param useDenseInstances
* dense features
* @param useWeights
* use weights
* @throws Exception
* in case of errors
*/
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
// Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
List<String> outcomes = new ArrayList<>();
for (Instance i : instances) {
outcomes.add(i.getOutcome());
}
List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
wekaInstances.setClassIndex(outcomeAttributes.size());
if (!outputFile.exists()) {
outputFile.mkdirs();
outputFile.createNewFile();
}
ArffSaver saver = new ArffSaver();
// preprocessingFilter.setInputFormat(wekaInstances);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(outputFile);
saver.setCompressOutput(true);
saver.setInstances(wekaInstances);
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useDenseInstances) {
wekaInstance = new DenseInstance(1.0, featureValues);
} else {
wekaInstance = new SparseInstance(1.0, featureValues);
}
wekaInstance.setDataset(wekaInstances);
Double instanceWeight = instance.getWeight();
if (useWeights) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
// finishes the incremental saving process
saver.writeIncremental(null);
}
Aggregations