use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.
the class CrfSuiteLoadModelConnector method getOutcomes.
private List<String> getOutcomes(JCas jcas, AnnotationFS unit) throws TextClassificationException {
Collection<TextClassificationOutcome> outcomes;
if (unit == null) {
outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
} else {
outcomes = JCasUtil.selectCovered(jcas, TextClassificationOutcome.class, unit);
}
if (outcomes.size() == 0) {
throw new TextClassificationException("No outcome annotations present in current CAS.");
}
List<String> stringOutcomes = new ArrayList<String>();
for (TextClassificationOutcome outcome : outcomes) {
stringOutcomes.add(outcome.getOutcome());
}
return stringOutcomes;
}
use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.
the class CrfSuiteLoadModelConnector method getInstancesInSequence.
private List<Instance> getInstancesInSequence(FeatureExtractorResource_ImplBase[] featureExtractors, JCas jcas, TextClassificationSequence sequence, boolean addInstanceId, int sequenceId) throws Exception {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, sequence);
for (TextClassificationTarget aTarget : seqTargets) {
Instance instance = new Instance();
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
}
// execute feature extractors and add features to instance
try {
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget));
}
} catch (TextClassificationException e) {
throw new AnalysisEngineProcessException(e);
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setJcasId(jcasId);
instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
return instances;
}
use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.
the class LucenePMetaCollectorBase method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
JCas view1;
JCas view2;
try {
view1 = jcas.getView(PART_ONE);
view2 = jcas.getView(PART_TWO);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
List<JCas> jcases = new ArrayList<JCas>();
jcases.add(view1);
jcases.add(view2);
FrequencyDistribution<String> view1NGrams;
FrequencyDistribution<String> view2NGrams;
FrequencyDistribution<String> documentNGrams;
try {
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
view1NGrams = getNgramsFDView1(view1, aTarget1);
view2NGrams = getNgramsFDView2(view2, aTarget2);
documentNGrams = getNgramsFD(jcases);
} catch (TextClassificationException e) {
throw new AnalysisEngineProcessException(e);
}
for (String ngram : documentNGrams.getKeys()) {
for (int i = 0; i < documentNGrams.getCount(ngram); i++) {
addField(getFieldName(), ngram);
}
}
for (String ngram : view1NGrams.getKeys()) {
for (int i = 0; i < view1NGrams.getCount(ngram); i++) {
addField(getFieldNameView1(), ngram);
}
}
for (String ngram : view2NGrams.getKeys()) {
for (int i = 0; i < view2NGrams.getCount(ngram); i++) {
addField(getFieldNameView2(), ngram);
}
}
}
use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.
the class WekaUtils method featureSelectionMultilabel.
/**
* Feature selection using Mulan.
*
* @param aContext
* Lab context
* @param trainData
* training data
* @param attributeEvaluator
* evaluator
* @param labelTransformationMethod
* transformation method
* @param numLabelsToKeep
* mapping
* @return remove object
* @throws TextClassificationException
* in case of errors
*/
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
// file to hold the results of attribute selection
File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
// filter for reducing dimension of attributes
Remove filterRemove = new Remove();
try {
MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
AttributeEvaluator attributeSelectionFilter;
// is complicated due to missing commandline support of mulan):
if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
} else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
} else {
throw new TextClassificationException("This Label Transformation Method is not supported.");
}
Ranker r = new Ranker();
int[] result = r.search(attributeSelectionFilter, mulanInstances);
// collect evaluation for *all* attributes and write to file
StringBuffer evalFile = new StringBuffer();
for (Attribute att : mulanInstances.getFeatureAttributes()) {
evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
}
FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
// create a filter to reduce the dimension of the attributes
int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
int[] labelIndices = mulanInstances.getLabelIndices();
System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
filterRemove.setAttributeIndicesArray(toKeep);
filterRemove.setInvertSelection(true);
filterRemove.setInputFormat(mulanInstances.getDataSet());
} catch (ArrayIndexOutOfBoundsException e) {
// less attributes than we want => no filtering
return null;
} catch (Exception e) {
throw new TextClassificationException(e);
}
return filterRemove;
}
use of org.dkpro.tc.api.exception.TextClassificationException in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method featureToAttributeUsingFeatureDescription.
public static Attribute featureToAttributeUsingFeatureDescription(String featureName, FeatureType value, String enumType) throws TextClassificationException {
String name = Utils.quote(featureName);
Attribute attribute;
// if value is a number then create a numeric attribute
if (value.equals(FeatureType.NUMERIC) || value.equals(FeatureType.BOOLEAN)) {
attribute = new Attribute(name);
} else if (value.equals(FeatureType.STRING)) {
attribute = new Attribute(name, true);
} else // if value is an Enum thene create a nominal attribute
if (value.equals(FeatureType.NOMINAL)) {
Class<?> forName = null;
try {
forName = Class.forName(enumType);
} catch (ClassNotFoundException e) {
throw new TextClassificationException(e);
}
Object[] enumConstants = forName.getEnumConstants();
ArrayList<String> attributeValues = new ArrayList<String>(enumConstants.length);
for (Object enumConstant : enumConstants) {
attributeValues.add(enumConstant.toString());
}
attribute = new Attribute(name, attributeValues);
} else {
attribute = new Attribute(name, (ArrayList<String>) null);
}
return attribute;
}
Aggregations