Search in sources :

Example 21 with TextClassificationException

use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.

the class CrfSuiteLoadModelConnector method getOutcomes.

private List<String> getOutcomes(JCas jcas, AnnotationFS unit) throws TextClassificationException {
    Collection<TextClassificationOutcome> outcomes;
    if (unit == null) {
        outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
    } else {
        outcomes = JCasUtil.selectCovered(jcas, TextClassificationOutcome.class, unit);
    }
    if (outcomes.size() == 0) {
        throw new TextClassificationException("No outcome annotations present in current CAS.");
    }
    List<String> stringOutcomes = new ArrayList<String>();
    for (TextClassificationOutcome outcome : outcomes) {
        stringOutcomes.add(outcome.getOutcome());
    }
    return stringOutcomes;
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList)

Example 22 with TextClassificationException

use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.

the class CrfSuiteLoadModelConnector method getInstancesInSequence.

private List<Instance> getInstancesInSequence(FeatureExtractorResource_ImplBase[] featureExtractors, JCas jcas, TextClassificationSequence sequence, boolean addInstanceId, int sequenceId) throws Exception {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, sequence);
    for (TextClassificationTarget aTarget : seqTargets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
        }
        // execute feature extractors and add features to instance
        try {
            for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
                instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget));
            }
        } catch (TextClassificationException e) {
            throw new AnalysisEngineProcessException(e);
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setJcasId(jcasId);
        instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 23 with TextClassificationException

use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.

the class LucenePMetaCollectorBase method process.

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    JCas view1;
    JCas view2;
    try {
        view1 = jcas.getView(PART_ONE);
        view2 = jcas.getView(PART_TWO);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    List<JCas> jcases = new ArrayList<JCas>();
    jcases.add(view1);
    jcases.add(view2);
    FrequencyDistribution<String> view1NGrams;
    FrequencyDistribution<String> view2NGrams;
    FrequencyDistribution<String> documentNGrams;
    try {
        TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
        TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
        view1NGrams = getNgramsFDView1(view1, aTarget1);
        view2NGrams = getNgramsFDView2(view2, aTarget2);
        documentNGrams = getNgramsFD(jcases);
    } catch (TextClassificationException e) {
        throw new AnalysisEngineProcessException(e);
    }
    for (String ngram : documentNGrams.getKeys()) {
        for (int i = 0; i < documentNGrams.getCount(ngram); i++) {
            addField(getFieldName(), ngram);
        }
    }
    for (String ngram : view1NGrams.getKeys()) {
        for (int i = 0; i < view1NGrams.getCount(ngram); i++) {
            addField(getFieldNameView1(), ngram);
        }
    }
    for (String ngram : view2NGrams.getKeys()) {
        for (int i = 0; i < view2NGrams.getCount(ngram); i++) {
            addField(getFieldNameView2(), ngram);
        }
    }
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 24 with TextClassificationException

use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.

the class WekaUtils method featureSelectionMultilabel.

/**
 * Feature selection using Mulan.
 *
 * @param aContext
 *            Lab context
 * @param trainData
 *            training data
 * @param attributeEvaluator
 *            evaluator
 * @param labelTransformationMethod
 *            transformation method
 * @param numLabelsToKeep
 *            mapping
 * @return remove object
 * @throws TextClassificationException
 *             in case of errors
 */
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
    // file to hold the results of attribute selection
    File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
    // filter for reducing dimension of attributes
    Remove filterRemove = new Remove();
    try {
        MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
        ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
        AttributeEvaluator attributeSelectionFilter;
        // is complicated due to missing commandline support of mulan):
        if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
            attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
        } else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
            attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
        } else {
            throw new TextClassificationException("This Label Transformation Method is not supported.");
        }
        Ranker r = new Ranker();
        int[] result = r.search(attributeSelectionFilter, mulanInstances);
        // collect evaluation for *all* attributes and write to file
        StringBuffer evalFile = new StringBuffer();
        for (Attribute att : mulanInstances.getFeatureAttributes()) {
            evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
        }
        FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
        // create a filter to reduce the dimension of the attributes
        int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
        int[] labelIndices = mulanInstances.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mulanInstances.getDataSet());
    } catch (ArrayIndexOutOfBoundsException e) {
        // less attributes than we want => no filtering
        return null;
    } catch (Exception e) {
        throw new TextClassificationException(e);
    }
    return filterRemove;
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) Attribute(weka.core.Attribute) Remove(weka.filters.unsupervised.attribute.Remove) MultiLabelInstances(mulan.data.MultiLabelInstances) AttributeEvaluator(weka.attributeSelection.AttributeEvaluator) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) Ranker(mulan.dimensionalityReduction.Ranker) FileNotFoundException(java.io.FileNotFoundException) InvalidDataFormatException(mulan.data.InvalidDataFormatException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ASEvaluation(weka.attributeSelection.ASEvaluation) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) File(java.io.File)

Example 25 with TextClassificationException

use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method featureToAttributeUsingFeatureDescription.

public static Attribute featureToAttributeUsingFeatureDescription(String featureName, FeatureType value, String enumType) throws TextClassificationException {
    String name = Utils.quote(featureName);
    Attribute attribute;
    // if value is a number then create a numeric attribute
    if (value.equals(FeatureType.NUMERIC) || value.equals(FeatureType.BOOLEAN)) {
        attribute = new Attribute(name);
    } else if (value.equals(FeatureType.STRING)) {
        attribute = new Attribute(name, true);
    } else // if value is an Enum thene create a nominal attribute
    if (value.equals(FeatureType.NOMINAL)) {
        Class<?> forName = null;
        try {
            forName = Class.forName(enumType);
        } catch (ClassNotFoundException e) {
            throw new TextClassificationException(e);
        }
        Object[] enumConstants = forName.getEnumConstants();
        ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
        for (Object enumConstant : enumConstants) {
            attributeValues.add(enumConstant.toString());
        }
        attribute = new Attribute(name, attributeValues);
    } else {
        attribute = new Attribute(name, (ArrayList<String>) null);
    }
    return attribute;
}
Also used : Attribute(weka.core.Attribute) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList)

Aggregations

TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)25 ArrayList (java.util.ArrayList)10 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)7 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)6 IOException (java.io.IOException)5 Feature (org.dkpro.tc.api.features.Feature)5 File (java.io.File)4 JCas (org.apache.uima.jcas.JCas)4 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)4 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)4 JCasId (org.dkpro.tc.api.type.JCasId)4 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)4 CASException (org.apache.uima.cas.CASException)3 PairFeatureExtractor (org.dkpro.tc.api.features.PairFeatureExtractor)3 FrequencyDistribution (de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution)2 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)2 SimilarityException (dkpro.similarity.algorithms.api.SimilarityException)2 HashSet (java.util.HashSet)2 FeatureExtractor (org.dkpro.tc.api.features.FeatureExtractor)2 Instance (org.dkpro.tc.api.features.Instance)2