use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceIdFeatureTest method instanceIdFeatureTest.
@Test
public void instanceIdFeatureTest() throws Exception {
AnalysisEngine engine = createEngine(NoOpAnnotator.class);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
engine.process(jcas);
TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
unit1.setId(0);
unit1.addToIndexes();
JCasId id = new JCasId(jcas);
id.setId(123);
id.addToIndexes();
Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
Feature feature2 = InstanceIdFeature.retrieve(jcas);
Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
assertEquals(feature.getValue(), "123_0");
assertEquals(feature2.getValue(), "123");
assertEquals(feature3.getValue(), "123_5_0");
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceExtractor method getUnitInstances.
public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
for (TextClassificationTarget aTarget : targets) {
Instance instance = new Instance();
if (addInstanceId) {
Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
instance.addFeature(feat);
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof FeatureExtractor)) {
throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
}
if (supportSparseFeatures) {
instance.addFeatures(getSparse(jcas, aTarget, featExt));
} else {
instance.addFeatures(getDense(jcas, aTarget, featExt));
}
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setWeight(getWeight(jcas, aTarget));
instance.setJcasId(jcasId);
// instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
return instances;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class FeatureTestUtil method assertFeatures.
/**
* @param expectedName
* expected
* @param expectedValue
* actual
* @param features
* feature
* @param epsilon
* epsilon
*/
public static void assertFeatures(String expectedName, double expectedValue, Set<Feature> features, double epsilon) {
Assert.assertNotNull(features);
boolean found = false;
for (Feature f : features) {
if (f.getName().equals(expectedName)) {
found = true;
Assert.assertEquals(expectedValue, (Double) f.getValue(), epsilon);
}
}
Assert.assertTrue(found);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class WekaDataWriter method getFeatureValues.
private double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
double[] featureValues = new double[attributeStore.getAttributes().size()];
for (Feature feature : instance.getFeatures()) {
try {
Attribute attribute = attributeStore.getAttribute(feature.getName());
Object featureValue = feature.getValue();
double attributeValue;
if (feature.getType() == FeatureType.NUMERIC) {
// numeric attribute
attributeValue = ((Number) feature.getValue()).doubleValue();
} else if (feature.getType() == FeatureType.BOOLEAN) {
// boolean attribute
if (featureValue instanceof Boolean) {
// value is provided as true/false value
attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
} else {
// we already have numerical values
if (featureValue instanceof Double) {
attributeValue = (Double) featureValue;
} else {
attributeValue = ((Integer) featureValue).doubleValue();
}
}
} else {
// nominal or string
Object stringValue = feature.getValue();
if (!attribute.isNominal() && !attribute.isString()) {
throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
}
int valIndex = attribute.indexOfValue(stringValue.toString());
if (valIndex == -1) {
if (attribute.isNominal()) {
throw new IllegalArgumentException("Value not defined for given nominal attribute!");
} else {
attribute.addStringValue(stringValue.toString());
valIndex = attribute.indexOfValue(stringValue.toString());
}
}
attributeValue = valIndex;
}
int offset = attributeStore.getAttributeOffset(attribute.name());
if (offset != -1) {
featureValues[offset] = attributeValue;
}
} catch (NullPointerException e) {
// ignore unseen attributes
}
}
return featureValues;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class WekaUtilTest method instanceToArffTest.
@Test
public void instanceToArffTest() throws Exception {
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
i1.setOutcomes("1");
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
i2.setOutcomes("2");
Instance i3 = new Instance();
i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
i3.setOutcomes("2");
List<Instance> iList = new ArrayList<>();
iList.add(i1);
iList.add(i2);
iList.add(i3);
File outfile = new File("target/test/out.txt");
outfile.mkdirs();
outfile.createNewFile();
outfile.deleteOnExit();
WekaUtils.instanceListToArffFile(outfile, iList);
System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
Aggregations