use of weka.core.Attribute in project lobcder by skoulouzis.
the class LDClustering method initAttributes.
private void initAttributes() throws ParseException, Exception {
int index = 0;
Attribute uidAttribute = new Attribute("uid", index++);
// Declare a nominal attribute along with its values
FastVector verbVector = new FastVector(Request.Method.values().length);
for (Request.Method m : Request.Method.values()) {
verbVector.addElement(m.code);
}
Attribute verbAttribute = new Attribute("verb", verbVector, index++);
Attribute checksumAttribute = new Attribute("checksum", (FastVector) null, index++);
Attribute contentTypeAttribute = new Attribute("contentType", (FastVector) null, index++);
Attribute createDateAttribute = new Attribute("createDate", "yyyy-MM-dd HH:mm:ss", index++);
Attribute locationPreferenceAttribute = new Attribute("locationPreference", (FastVector) null, index++);
Attribute descriptionAttribute = new Attribute("description", (FastVector) null, index++);
Attribute validationDateAttribute = new Attribute("validationDate", "yyyy-MM-dd HH:mm:ss", index++);
Attribute lengthAttribute = new Attribute("length", index++);
Attribute modifiedDateAttribute = new Attribute("modifiedDate", "yyyy-MM-dd HH:mm:ss", index++);
Attribute pathAttribute = new Attribute("name", (FastVector) null, index++);
Attribute parentRefAttribute = new Attribute("parentRef", index++);
Attribute statusAttribute = new Attribute("status", (FastVector) null, index++);
FastVector typeVector = new FastVector(3);
typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_DATA);
typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_FILE);
typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_FOLDER);
Attribute typeAttribute = new Attribute("type", typeVector, index++);
// Declare the class attribute along with its values
FastVector supervisedVector = new FastVector(2);
supervisedVector.addElement("true");
supervisedVector.addElement("false");
Attribute supervisedAttribute = new Attribute("supervised", supervisedVector, index++);
Attribute ownerAttribute = new Attribute("owner", (FastVector) null, index++);
// Declare the feature vector
metdataAttributes = new FastVector();
// 0
metdataAttributes.addElement(uidAttribute);
// 1
metdataAttributes.addElement(verbAttribute);
// 2
metdataAttributes.addElement(checksumAttribute);
// 3
metdataAttributes.addElement(contentTypeAttribute);
// 4
metdataAttributes.addElement(createDateAttribute);
// 5
metdataAttributes.addElement(locationPreferenceAttribute);
// 6
metdataAttributes.addElement(descriptionAttribute);
// 7
metdataAttributes.addElement(validationDateAttribute);
// 8
metdataAttributes.addElement(lengthAttribute);
// 9
metdataAttributes.addElement(modifiedDateAttribute);
// 10
metdataAttributes.addElement(pathAttribute);
// 11
metdataAttributes.addElement(parentRefAttribute);
// 12
metdataAttributes.addElement(statusAttribute);
// 13
metdataAttributes.addElement(typeAttribute);
// 14
metdataAttributes.addElement(supervisedAttribute);
// 15
metdataAttributes.addElement(ownerAttribute);
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method instanceListToArffFileMultiLabel.
/**
* Converts a feature store to a list of instances. Multi-label case.
*
* @param outputFile
* the output file
* @param instances
* the instances to convert
* @param useDenseInstances
* dense features
* @param useWeights
* use weights
* @throws Exception
* in case of errors
*/
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
// Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
List<String> outcomes = new ArrayList<>();
for (Instance i : instances) {
outcomes.add(i.getOutcome());
}
List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
// for Meka-internal use
Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
wekaInstances.setClassIndex(outcomeAttributes.size());
if (!outputFile.exists()) {
outputFile.mkdirs();
outputFile.createNewFile();
}
ArffSaver saver = new ArffSaver();
// preprocessingFilter.setInputFormat(wekaInstances);
saver.setRetrieval(Saver.INCREMENTAL);
saver.setFile(outputFile);
saver.setCompressOutput(true);
saver.setInstances(wekaInstances);
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
double[] featureValues = getFeatureValues(attributeStore, instance);
// set class label values
List<String> instanceOutcome = instance.getOutcomes();
for (Attribute label : outcomeAttributes) {
String labelname = label.name();
featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
}
weka.core.Instance wekaInstance;
if (useDenseInstances) {
wekaInstance = new DenseInstance(1.0, featureValues);
} else {
wekaInstance = new SparseInstance(1.0, featureValues);
}
wekaInstance.setDataset(wekaInstances);
Double instanceWeight = instance.getWeight();
if (useWeights) {
wekaInstance.setWeight(instanceWeight);
}
// preprocessingFilter.input(wekaInstance);
// saver.writeIncremental(preprocessingFilter.output());
saver.writeIncremental(wekaInstance);
}
// finishes the incremental saving process
saver.writeIncremental(null);
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method tcInstanceToMekaInstance.
/**
* Converts a TC instance object into a Meka instance object, compatible with the given
* attribute set and class labels.
*
* @param instance
* tc instance
* @param trainingData
* training data
* @param allClassLabels
* all labels
* @return weka instance
* @throws Exception
* in case of errors
*/
public static weka.core.Instance tcInstanceToMekaInstance(Instance instance, Instances trainingData, List<String> allClassLabels) throws Exception {
AttributeStore attributeStore = new AttributeStore();
List<Attribute> outcomeAttributes = createOutcomeAttributes(allClassLabels);
// in Meka, class label attributes have to go on top
for (Attribute attribute : outcomeAttributes) {
attributeStore.addAttributeAtBegin(attribute.name(), attribute);
}
for (int i = outcomeAttributes.size(); i < trainingData.numAttributes(); i++) {
attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
}
double[] featureValues = getFeatureValues(attributeStore, instance);
SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
trainingData.setClassIndex(outcomeAttributes.size());
sparseInstance.setDataset(trainingData);
return sparseInstance;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method tcInstanceToWekaInstance.
/**
* Converts a TC instance object into a Weka instance object, compatible with the given
* attribute set and class labels.
*
* @param instance
* tc instance
* @param trainingData
* training data
* @param allClasses
* all classes
* @param isRegressionExperiment
* is regression
* @return weka instance
* @throws Exception
* in case of errors
*/
public static weka.core.Instance tcInstanceToWekaInstance(Instance instance, Instances trainingData, List<String> allClasses, boolean isRegressionExperiment) throws Exception {
AttributeStore attributeStore = new AttributeStore();
// outcome attribute is last and will be ignored
for (int i = 0; i < trainingData.numAttributes() - 1; i++) {
attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
}
// add outcome attribute
Attribute outcomeAttribute = createOutcomeAttribute(allClasses, isRegressionExperiment);
attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
double[] featureValues = getFeatureValues(attributeStore, instance);
SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
sparseInstance.setDataset(trainingData);
return sparseInstance;
}
use of weka.core.Attribute in project dkpro-tc by dkpro.
the class WekaUtils method featureSelectionMultilabel.
/**
* Feature selection using Mulan.
*
* @param aContext
* Lab context
* @param trainData
* training data
* @param attributeEvaluator
* evaluator
* @param labelTransformationMethod
* transformation method
* @param numLabelsToKeep
* mapping
* @return remove object
* @throws TextClassificationException
* in case of errors
*/
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
// file to hold the results of attribute selection
File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
// filter for reducing dimension of attributes
Remove filterRemove = new Remove();
try {
MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
AttributeEvaluator attributeSelectionFilter;
// is complicated due to missing commandline support of mulan):
if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
} else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
} else {
throw new TextClassificationException("This Label Transformation Method is not supported.");
}
Ranker r = new Ranker();
int[] result = r.search(attributeSelectionFilter, mulanInstances);
// collect evaluation for *all* attributes and write to file
StringBuffer evalFile = new StringBuffer();
for (Attribute att : mulanInstances.getFeatureAttributes()) {
evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
}
FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
// create a filter to reduce the dimension of the attributes
int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
int[] labelIndices = mulanInstances.getLabelIndices();
System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
filterRemove.setAttributeIndicesArray(toKeep);
filterRemove.setInvertSelection(true);
filterRemove.setInputFormat(mulanInstances.getDataSet());
} catch (ArrayIndexOutOfBoundsException e) {
// less attributes than we want => no filtering
return null;
} catch (Exception e) {
throw new TextClassificationException(e);
}
return filterRemove;
}
Aggregations