Search in sources :

Example 11 with Attribute

use of weka.core.Attribute in project lobcder by skoulouzis.

the class LDClustering method initAttributes.

private void initAttributes() throws ParseException, Exception {
    int index = 0;
    Attribute uidAttribute = new Attribute("uid", index++);
    // Declare a nominal attribute along with its values
    FastVector verbVector = new FastVector(Request.Method.values().length);
    for (Request.Method m : Request.Method.values()) {
        verbVector.addElement(m.code);
    }
    Attribute verbAttribute = new Attribute("verb", verbVector, index++);
    Attribute checksumAttribute = new Attribute("checksum", (FastVector) null, index++);
    Attribute contentTypeAttribute = new Attribute("contentType", (FastVector) null, index++);
    Attribute createDateAttribute = new Attribute("createDate", "yyyy-MM-dd HH:mm:ss", index++);
    Attribute locationPreferenceAttribute = new Attribute("locationPreference", (FastVector) null, index++);
    Attribute descriptionAttribute = new Attribute("description", (FastVector) null, index++);
    Attribute validationDateAttribute = new Attribute("validationDate", "yyyy-MM-dd HH:mm:ss", index++);
    Attribute lengthAttribute = new Attribute("length", index++);
    Attribute modifiedDateAttribute = new Attribute("modifiedDate", "yyyy-MM-dd HH:mm:ss", index++);
    Attribute pathAttribute = new Attribute("name", (FastVector) null, index++);
    Attribute parentRefAttribute = new Attribute("parentRef", index++);
    Attribute statusAttribute = new Attribute("status", (FastVector) null, index++);
    FastVector typeVector = new FastVector(3);
    typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_DATA);
    typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_FILE);
    typeVector.addElement(nl.uva.cs.lobcder.util.Constants.LOGICAL_FOLDER);
    Attribute typeAttribute = new Attribute("type", typeVector, index++);
    // Declare the class attribute along with its values
    FastVector supervisedVector = new FastVector(2);
    supervisedVector.addElement("true");
    supervisedVector.addElement("false");
    Attribute supervisedAttribute = new Attribute("supervised", supervisedVector, index++);
    Attribute ownerAttribute = new Attribute("owner", (FastVector) null, index++);
    // Declare the feature vector
    metdataAttributes = new FastVector();
    // 0
    metdataAttributes.addElement(uidAttribute);
    // 1
    metdataAttributes.addElement(verbAttribute);
    // 2
    metdataAttributes.addElement(checksumAttribute);
    // 3
    metdataAttributes.addElement(contentTypeAttribute);
    // 4
    metdataAttributes.addElement(createDateAttribute);
    // 5
    metdataAttributes.addElement(locationPreferenceAttribute);
    // 6
    metdataAttributes.addElement(descriptionAttribute);
    // 7
    metdataAttributes.addElement(validationDateAttribute);
    // 8
    metdataAttributes.addElement(lengthAttribute);
    // 9
    metdataAttributes.addElement(modifiedDateAttribute);
    // 10
    metdataAttributes.addElement(pathAttribute);
    // 11
    metdataAttributes.addElement(parentRefAttribute);
    // 12
    metdataAttributes.addElement(statusAttribute);
    // 13
    metdataAttributes.addElement(typeAttribute);
    // 14
    metdataAttributes.addElement(supervisedAttribute);
    // 15
    metdataAttributes.addElement(ownerAttribute);
}
Also used : FastVector(weka.core.FastVector) Attribute(weka.core.Attribute) Method(io.milton.http.Request.Method) Request(io.milton.http.Request)

Example 12 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method instanceListToArffFileMultiLabel.

/**
 * Converts a feature store to a list of instances. Multi-label case.
 *
 * @param outputFile
 *            the output file
 * @param instances
 *            the instances to convert
 * @param useDenseInstances
 *            dense features
 * @param useWeights
 *            use weights
 * @throws Exception
 *             in case of errors
 */
public static void instanceListToArffFileMultiLabel(File outputFile, List<Instance> instances, boolean useDenseInstances, boolean useWeights) throws Exception {
    // Filter preprocessingFilter = new ReplaceMissingValuesWithZeroFilter();
    AttributeStore attributeStore = WekaFeatureEncoder.getAttributeStore(instances);
    List<String> outcomes = new ArrayList<>();
    for (Instance i : instances) {
        outcomes.add(i.getOutcome());
    }
    List<Attribute> outcomeAttributes = createOutcomeAttributes(new ArrayList<String>(outcomes));
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    // for Meka-internal use
    Instances wekaInstances = new Instances(RELATION_NAME + ": -C " + outcomeAttributes.size() + " ", attributeStore.getAttributes(), instances.size());
    wekaInstances.setClassIndex(outcomeAttributes.size());
    if (!outputFile.exists()) {
        outputFile.mkdirs();
        outputFile.createNewFile();
    }
    ArffSaver saver = new ArffSaver();
    // preprocessingFilter.setInputFormat(wekaInstances);
    saver.setRetrieval(Saver.INCREMENTAL);
    saver.setFile(outputFile);
    saver.setCompressOutput(true);
    saver.setInstances(wekaInstances);
    for (int i = 0; i < instances.size(); i++) {
        Instance instance = instances.get(i);
        double[] featureValues = getFeatureValues(attributeStore, instance);
        // set class label values
        List<String> instanceOutcome = instance.getOutcomes();
        for (Attribute label : outcomeAttributes) {
            String labelname = label.name();
            featureValues[attributeStore.getAttributeOffset(labelname)] = instanceOutcome.contains(labelname.split(CLASS_ATTRIBUTE_PREFIX)[1]) ? 1.0d : 0.0d;
        }
        weka.core.Instance wekaInstance;
        if (useDenseInstances) {
            wekaInstance = new DenseInstance(1.0, featureValues);
        } else {
            wekaInstance = new SparseInstance(1.0, featureValues);
        }
        wekaInstance.setDataset(wekaInstances);
        Double instanceWeight = instance.getWeight();
        if (useWeights) {
            wekaInstance.setWeight(instanceWeight);
        }
        // preprocessingFilter.input(wekaInstance);
        // saver.writeIncremental(preprocessingFilter.output());
        saver.writeIncremental(wekaInstance);
    }
    // finishes the incremental saving process
    saver.writeIncremental(null);
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) ArffSaver(weka.core.converters.ArffSaver) Instances(weka.core.Instances) MultiLabelInstances(mulan.data.MultiLabelInstances)

Example 13 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method tcInstanceToMekaInstance.

/**
 * Converts a TC instance object into a Meka instance object, compatible with the given
 * attribute set and class labels.
 *
 * @param instance
 *            tc instance
 * @param trainingData
 *            training data
 * @param allClassLabels
 *            all labels
 * @return weka instance
 * @throws Exception
 *             in case of errors
 */
public static weka.core.Instance tcInstanceToMekaInstance(Instance instance, Instances trainingData, List<String> allClassLabels) throws Exception {
    AttributeStore attributeStore = new AttributeStore();
    List<Attribute> outcomeAttributes = createOutcomeAttributes(allClassLabels);
    // in Meka, class label attributes have to go on top
    for (Attribute attribute : outcomeAttributes) {
        attributeStore.addAttributeAtBegin(attribute.name(), attribute);
    }
    for (int i = outcomeAttributes.size(); i < trainingData.numAttributes(); i++) {
        attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
    }
    double[] featureValues = getFeatureValues(attributeStore, instance);
    SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
    trainingData.setClassIndex(outcomeAttributes.size());
    sparseInstance.setDataset(trainingData);
    return sparseInstance;
}
Also used : SparseInstance(weka.core.SparseInstance) Attribute(weka.core.Attribute)

Example 14 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method tcInstanceToWekaInstance.

/**
 * Converts a TC instance object into a Weka instance object, compatible with the given
 * attribute set and class labels.
 *
 * @param instance
 *            tc instance
 * @param trainingData
 *            training data
 * @param allClasses
 *            all classes
 * @param isRegressionExperiment
 *            is regression
 * @return weka instance
 * @throws Exception
 *             in case of errors
 */
public static weka.core.Instance tcInstanceToWekaInstance(Instance instance, Instances trainingData, List<String> allClasses, boolean isRegressionExperiment) throws Exception {
    AttributeStore attributeStore = new AttributeStore();
    // outcome attribute is last and will be ignored
    for (int i = 0; i < trainingData.numAttributes() - 1; i++) {
        attributeStore.addAttribute(trainingData.attribute(i).name(), trainingData.attribute(i));
    }
    // add outcome attribute
    Attribute outcomeAttribute = createOutcomeAttribute(allClasses, isRegressionExperiment);
    attributeStore.addAttribute(outcomeAttribute.name(), outcomeAttribute);
    double[] featureValues = getFeatureValues(attributeStore, instance);
    SparseInstance sparseInstance = new SparseInstance(1.0, featureValues);
    sparseInstance.setDataset(trainingData);
    return sparseInstance;
}
Also used : SparseInstance(weka.core.SparseInstance) Attribute(weka.core.Attribute)

Example 15 with Attribute

use of weka.core.Attribute in project dkpro-tc by dkpro.

the class WekaUtils method featureSelectionMultilabel.

/**
 * Feature selection using Mulan.
 *
 * @param aContext
 *            Lab context
 * @param trainData
 *            training data
 * @param attributeEvaluator
 *            evaluator
 * @param labelTransformationMethod
 *            transformation method
 * @param numLabelsToKeep
 *            mapping
 * @return remove object
 * @throws TextClassificationException
 *             in case of errors
 */
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
    // file to hold the results of attribute selection
    File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
    // filter for reducing dimension of attributes
    Remove filterRemove = new Remove();
    try {
        MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
        ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
        AttributeEvaluator attributeSelectionFilter;
        // is complicated due to missing commandline support of mulan):
        if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
            attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
        } else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
            attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
        } else {
            throw new TextClassificationException("This Label Transformation Method is not supported.");
        }
        Ranker r = new Ranker();
        int[] result = r.search(attributeSelectionFilter, mulanInstances);
        // collect evaluation for *all* attributes and write to file
        StringBuffer evalFile = new StringBuffer();
        for (Attribute att : mulanInstances.getFeatureAttributes()) {
            evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
        }
        FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
        // create a filter to reduce the dimension of the attributes
        int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
        int[] labelIndices = mulanInstances.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mulanInstances.getDataSet());
    } catch (ArrayIndexOutOfBoundsException e) {
        // less attributes than we want => no filtering
        return null;
    } catch (Exception e) {
        throw new TextClassificationException(e);
    }
    return filterRemove;
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) Attribute(weka.core.Attribute) Remove(weka.filters.unsupervised.attribute.Remove) MultiLabelInstances(mulan.data.MultiLabelInstances) AttributeEvaluator(weka.attributeSelection.AttributeEvaluator) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) Ranker(mulan.dimensionalityReduction.Ranker) FileNotFoundException(java.io.FileNotFoundException) InvalidDataFormatException(mulan.data.InvalidDataFormatException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ASEvaluation(weka.attributeSelection.ASEvaluation) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) File(java.io.File)

Aggregations

Attribute (weka.core.Attribute)28 ArrayList (java.util.ArrayList)12 Instances (weka.core.Instances)12 Feature (org.dkpro.tc.api.features.Feature)8 Instance (org.dkpro.tc.api.features.Instance)8 SparseInstance (weka.core.SparseInstance)5 Test (org.junit.Test)4 DenseInstance (weka.core.DenseInstance)4 Instance (weka.core.Instance)4 ArffSaver (weka.core.converters.ArffSaver)4 File (java.io.File)3 MultiLabelInstances (mulan.data.MultiLabelInstances)3 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)3 AttributeStore (org.dkpro.tc.ml.weka.util.AttributeStore)3 FastVector (weka.core.FastVector)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2 Request (io.milton.http.Request)1